diff -Nru gcc-4.6-4.6.2/debian/README.gnat gcc-4.6-4.6.4/debian/README.gnat --- gcc-4.6-4.6.2/debian/README.gnat 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/README.gnat 2013-04-14 23:00:34.000000000 +0000 @@ -1,7 +1,7 @@ If you want to develop Ada programs and libraries on Debian, please read the Debian Policy for Ada: -http://www.ada-france.org/debian/debian-ada-policy.html +http://people.debian.org/~lbrenta/debian-ada-policy.html The default Ada compiler is and always will be the package `gnat'. Debian contains many programs and libraries compiled with it, which diff -Nru gcc-4.6-4.6.2/debian/changelog gcc-4.6-4.6.4/debian/changelog --- gcc-4.6-4.6.2/debian/changelog 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/changelog 2013-04-14 23:00:34.000000000 +0000 @@ -1,14 +1,567 @@ -gcc-4.6 (4.6.2-10ubuntu1~10.04.2) lucid; urgency=low +gcc-4.6 (4.6.4-1ubuntu1~10.04) lucid; urgency=low - * Fix amd64 build. + * Build for lucid. - -- Matthias Klose Mon, 09 Jan 2012 12:46:42 +0100 + -- Matthias Klose Mon, 15 Apr 2013 00:34:24 +0200 -gcc-4.6 (4.6.2-10ubuntu1~10.04.1) lucid; urgency=low +gcc-4.6 (4.6.4-1ubuntu1) raring; urgency=low - * Upload to the ubuntu-toolchain-r test PPA. + * Merge with Debian; remaining changes: + - Build from the upstream source. - -- Matthias Klose Sat, 07 Jan 2012 22:14:18 +0100 + -- Matthias Klose Fri, 12 Apr 2013 14:18:06 +0200 + +gcc-4.6 (4.6.4-1) experimental; urgency=low + + * GCC 4.6.4 release. + * Update the Linaro support to the 4.6-2013.04 release. + + -- Matthias Klose Fri, 12 Apr 2013 13:57:03 +0200 + +gcc-4.6 (4.6.3-16ubuntu1) raring; urgency=low + + * Merge with Debian; remaining changes: + - Build from the upstream source. + + -- Matthias Klose Fri, 05 Apr 2013 16:18:23 +0200 + +gcc-4.6 (4.6.3-16) experimental; urgency=low + + * Update to SVN 20130405 (r197516) from the gcc-4_6-branch (4.6.4 release + candidate 1). + - Fix PR target/56114 (x86), PR tree-optimization/55755, + PR rtl-optimization/56023, PR tree-optimization/55264, + PR fortran/55072, PR bootstrap/55571, PR rtl-optimization/56275, + PR c++/56247, PR fortran/55852, PR fortran/50627, + PR fortran/56054, PR other/54620, PR target/39064, + PR middle-end/45472, PR middle-end/56461, PR middle-end/55889, + PR target/56560 (x86), PR bootstrap/56258, + PR bootstrap/56258, PR c++/56403, PR fortran/56615, + PR fortran/56575, PR fortran/55362, PR fortran/56385, PR fortran/56318, + PR middle-end/52547, PR tree-optimization/56539, PR middle-end/56015, + PR tree-optimization/56098, PR tree-optimization/55921, PR c/54363, + PR middle-end/54486, PR debug/53174, PR middle-end/52547, + PR tree-optimization/52445, PR c++/56239, PR target/56771 (ARM), + PR rtl-optimization/48308, PR rtl-optimization/54472, + PR tree-optimization/48189, PR fortran/56737, PR fortran/56737, + PR fortran/56735, PR other/43620. + * Backport PR rtl-optimization/52573 from trunk, apply for m68k only. + Closes: #698380. + * Allow building with cloog-0.16 / ppl-1.0. + * Update the Linaro support to the 4.6-2013.02 release. + + -- Matthias Klose Fri, 05 Apr 2013 16:12:56 +0200 + +gcc-4.6 (4.6.3-15ubuntu4) raring; urgency=low + + * Update to SVN 20130403 (r197459) from the gcc-4_6-branch. + - Fix PR middle-end/45472, PR middle-end/56461, PR middle-end/55889, + PR middle-end/56077, PR target/56560 (x86), PR bootstrap/56258, + PR bootstrap/56258, PR c++/56403, PR fortran/56615, + PR fortran/56575, PR fortran/55362, PR fortran/56385, PR fortran/56318, + PR middle-end/52547, PR tree-optimization/56539, PR middle-end/56015, + PR tree-optimization/56098, PR tree-optimization/55921, PR c/54363, + PR middle-end/54486, PR debug/53174, PR middle-end/52547, + PR tree-optimization/52445, PR c++/56239, PR target/56771 (ARM). + + -- Matthias Klose Thu, 04 Apr 2013 00:10:51 +0200 + +gcc-4.6 (4.6.3-15ubuntu3) raring; urgency=low + + * Update to SVN 20130214 (r195576) from the gcc-4_6-branch. + - Fix PR bootstrap/55571, PR rtl-optimization/56275, + PR c++/56247, PR fortran/55852, PR fortran/50627, + PR fortran/56054, PR other/54620, PR target/39064. + * Update the Linaro support to the 4.6-2013.02 release. + + -- Matthias Klose Thu, 14 Feb 2013 20:53:11 +0100 + +gcc-4.6 (4.6.3-15ubuntu2) raring; urgency=low + + * Update to SVN 20130130 (r195576) from the gcc-4_6-branch. + - Fix PR target/56114 (x86), PR tree-optimization/55755, + PR rtl-optimization/56023, PR tree-optimization/55264, + PR fortran/55072. + * Allow building with cloog-0.16 / ppl-1.0. + + -- Matthias Klose Wed, 30 Jan 2013 16:15:58 +0100 + +gcc-4.6 (4.6.3-15ubuntu1) raring; urgency=low + + * Merge with Debian; remaining changes: + - Build from the upstream source. + + -- Matthias Klose Tue, 15 Jan 2013 00:10:43 +0100 + +gcc-4.6 (4.6.3-15) unstable; urgency=low + + * Update to SVN 20130114 (r195168) from the gcc-4_6-branch. + - Fix PR bootstrap/55571, PR target/53789, PR c++/55804, + PR tree-optimization/55355, PR target/54121 (sparc), PR c++/55032, + PR middle-end/50283, PR target/55195, PR libgcc/48076, PR c++/55877, + PR c++/55032, PR c++/55245, PR c++/54883, PR c++/55249, PR c++/53862, + PR c++/51662, PR target/53912 (mingw), PR ada/54614, PR fortran/42769, + PR fortran/45836, PR fortran/45900, PR fortran/55827, PR fortran/55618. + - Backport multiarch patches, including powerpcspu fix. Closes: #695654. + + [ Matthias Klose ] + * For cross builds, fix libc6 dependencies for non-default multilib packages. + * Don't ship libiberty.a in gcc-4.6-hppa64. Closes: #659556. + + [ Thorsten Glaser ] + * libffi: Update the libffi-m68k patch from upstream and apply + further fixes from upstream as well as upstreamed from #660525. + * Ada: debian/patches/ada-libgnatprj.diff: Add missing wildcard + to the m68k-*-linux* target (fixes building shared libraries). + * Ada: Enable on m68k Linux. + * Backport: trunk r187181, r187234, r187714 to speed up genattrtab, + apply for m68k only. + * Backport: atomic builtin backend code for Linux/m68k from trunk. + * PR52714: Add proposed fix (use gcc-4.5 version of PR45695 fix), + apply for m68k only. + * m68k: PR40134: Add t-slibgcc-libgcc for m68k-linux. + * Cross: When building a cross-compiler, re-enable building the + runtime packages that were disabled because gcc-4.7 provides + them for the main archive to keep them self-contained. + * mint-m68k: Add the FreeMiNT patches from Vincent Rivière to + enable building DEB_STAGE=stage1 cross-compilers, which are + needed for developing bootloaders on m68k. + * Closes: #694112. + + -- Matthias Klose Mon, 14 Jan 2013 19:37:53 +0100 + +gcc-4.6 (4.6.3-14) unstable; urgency=low + + * Update to SVN 20121121 (r193837) from the gcc-4_6-branch. + - Fix PR fortran/55314. + * Make explicit --{en,dis}able-multiarch options effecitive (Thorsten Glaser). + * Fix multiarch name for powerpc non-biarch builds. + * Fix 64bit C++ header installation on s390. Closes: #694482. + + -- Matthias Klose Tue, 27 Nov 2012 06:02:15 +0100 + +gcc-4.6 (4.6.3-13ubuntu1) raring; urgency=low + + * Merge with Debian; remaining changes: + - Build from the upstream source. + + -- Matthias Klose Thu, 22 Nov 2012 04:25:03 +0100 + +gcc-4.6 (4.6.3-13) unstable; urgency=low + + * Update to SVN 20121121 (r193700) from the gcc-4_6-branch. + - Fix PR middle-end/55219 (ice on valid), PR rtl-optimization/48374 (ice + on valid), PR rtl-optimization/53701 (ice on valid), PR middle-end/54945, + PR target/54950 (m32c), PR libfortran/54736 (wrong code). + * Split multiarch patches into local and upstreamed parts. + * Update symbols files for powerpcspe (Roland Stigge). Closes: #693326. + * Clean up libstdc++ man pages. Closes: #692446. + + -- Matthias Klose Thu, 22 Nov 2012 02:28:41 +0100 + +gcc-4.6 (4.6.3-12) unstable; urgency=low + + * Update to SVN 20121011 (r192379) from the gcc-4_6-branch. + - PARISC fix, test case fix. + + [ Matthias Klose ] + * Merge from gnat-4.6 4.6.3-6: + * debian/patches/ada-symbolic-tracebacks.diff (src/gcc/ada/tracebak.c): + Use the GCC stack unwinder on GNU/kFreeBSD too. Closes: #685559. + * debian/patches/gcc_ada_gcc-interface_Makefile.in.diff: link + libgnarl.so.4.6 with librt on GNU/Hurd. Closes: #685561. + * debian/patches/ada-kfreebsd-gnu.diff: likewise on GNU/kFreeBSD. + Closes: #685562. + * debian/patches/ada-symbolic-tracebacks.diff (src/gcc/ada/tracebak.c): + new hunk. Use the GCC stack unwinder on GNU/Hurd. Closes: #681998. + * debian/patches/ada-link-lib.diff: do not use parallel makes to build + the GNAT tools. Closes: #667184. + + [ Thorsten Glaser ] + * Actually apply the libffi-m68k patch; update it from upstream + * Apply further m68k platform bugfixes (pr47955, m68k-fp-cmp-zero) + * Revert pr45144 fix on m68k only, it results in miscompilation + * Apply initial m68k-ada support patch; enable Ada for m68k + * debian/patches/ada-libgnatprj.diff: add support for m68k-*-linux. + + [ Steven Chamberlain ] + * Fix kfreebsd build issues. + + -- Matthias Klose Fri, 12 Oct 2012 00:19:41 +0200 + +gcc-4.6 (4.6.3-11) unstable; urgency=low + + * Update to SVN 20121006 (r192156) from the gcc-4_6-branch. + - Fix PR other/43620, PR middle-end/54638, PR libstdc++/54228, + PR tree-optimization/33763 (closes: #672411), PR target/54785, + PR target/54741. + * On ARM, don't warn anymore that 4.4 has changed the `va_list' mangling, + taken from the trunk. + * Don't run the libstdc++ tests on mipsel, times out on the buildds. + + -- Matthias Klose Sat, 06 Oct 2012 14:05:54 +0200 + +gcc-4.6 (4.6.3-10ubuntu1) quantal; urgency=low + + * Merge with Debian. + + -- Matthias Klose Tue, 18 Sep 2012 22:44:00 +0200 + +gcc-4.6 (4.6.3-10) unstable; urgency=low + + * Update to SVN 20120918 (r191439) from the gcc-4_6-branch. + - Fix PR c/54552 (ice on valid), PR c/54103 (ice on valid), + PR target/54536 (AVR), PR middle-end/54515 (ice on valid), + PR target/45070 (ARM, wrong code), PR target/54220 (AVR), + PR driver/54335, PR rtl-optimization/54369 (mips, wrong code), + PR c++/54511 (ice on valid), PR fortran/54225 (ice on invalid), + PR fortran/53306 (ice on invalid), PR fortran/54556 (wrong code), + PR fortran/54208 (rejects valid). + + [ Nobuhiro Iwamatsu ] + * Remove sh4-enable-ieee.diff, -mieee enabled by default. Closes: #685974. + + [ Matthias Klose ] + * Fix PR tree-optimization/51987, backport from the trunk, Linaro only + (Matthew Gretton-Dann). LP: #1029454. + + -- Matthias Klose Tue, 18 Sep 2012 22:40:18 +0200 + +gcc-4.6 (4.6.3-9ubuntu1) quantal; urgency=low + + * Merge with Debian. + + -- Matthias Klose Mon, 20 Aug 2012 18:30:30 +0200 + +gcc-4.6 (4.6.3-9) unstable; urgency=medium + + * Update to SVN 20120820 (r190530) from the gcc-4_6-branch. + - ARM: Set vector type alignment to 8 bytes. + - Fix PR target/33135 (SH), PR rtl-optimization/53908 (wrong code), + PR middle-end/53433, PR middle-end/38474, PR middle-end/53790, + PR c++/52988 (wrong code), PR fortran/51758. + + [ Aurelien Jarno ] + * Add patches/ada-ppc64.diff to fix GNAT build on ppc64. + * powerpc64: fix non-multilib builds. + + [ Matthias Klose ] + * Update the Linaro support to the 4.6-2012.08 release. + * spu build: Move static libraries to version specific directories. + Closes: #680022. + + [ Thibaut Girka ] + * Fix cross compilers for 64bit architectures when using + DEB_CROSS_NO_BIARCH. + + -- Matthias Klose Mon, 20 Aug 2012 13:13:45 +0200 + +gcc-4.6 (4.6.3-8ubuntu1) quantal; urgency=low + + * Merge with Debian. + + -- Matthias Klose Mon, 25 Jun 2012 14:45:37 +0200 + +gcc-4.6 (4.6.3-8) unstable; urgency=low + + * Update to SVN 20120624 (r188916) from the gcc-4_6-branch. + - Fix PR gcov-profile/53744, PR target/48126, PR target/53621, + PR target/53559, PR target/46261, PR target/52999, PR middle-end/53541, + PR target/53385, PR middle-end/51071, PR target/52407, PR c/52862, + PR fortran/53597, PR fortran/50619, PR fortran/53521, PR fortran/53389, + PR libstdc++/53678. + * Update the Linaro support to the 4.6-2012.06 release. + + -- Matthias Klose Sun, 24 Jun 2012 14:41:39 +0200 + +gcc-4.6 (4.6.3-7) unstable; urgency=low + + * Update to SVN 20120528 (r187930) from the gcc-4_6-branch. + - Fix PR target/53385, PR middle-end/51071, PR target/52407, + PR c/52862, PR fortran/53389. + + -- Matthias Klose Mon, 28 May 2012 16:12:02 +0200 + +gcc-4.6 (4.6.3-6ubuntu1) quantal; urgency=low + + * Merge with Debian. + + -- Matthias Klose Tue, 22 May 2012 07:39:40 +0200 + +gcc-4.6 (4.6.3-6) unstable; urgency=low + + * Update to SVN 20120522 (r187757) from the gcc-4_6-branch. + - Fix PR fortran/52864, PR c/53418, PR target/53416, PR target/46098, + PR target/52999, PR target/53228, PR target/53199, PR fortran/53310. + * Update the arm-dynamic-linker patch as found on the trunk, and + the arm-multilib-defaults patch as proposed for upstream. + * Update the Linaro support to the 4.6-2012.05 release. + + -- Matthias Klose Tue, 22 May 2012 13:24:37 +0800 + +gcc-4.6 (4.6.3-5ubuntu2) quantal; urgency=low + + * Update to SVN 20120502 (r187066) from the gcc-4_6-branch. + - Fix PR fortran/52864. + * Update the arm-dynamic-linker patch as found on the trunk, and + the arm-multilib-defaults patch as proposed for upstream. + + -- Matthias Klose Thu, 03 May 2012 11:31:18 +0200 + +gcc-4.6 (4.6.3-5ubuntu1) quantal; urgency=low + + * Default to armv5t, soft float on armel. + * Build again multilib gnat on armel and armhf. + + -- Matthias Klose Wed, 25 Apr 2012 16:26:46 +0200 + +gcc-4.6 (4.6.3-5) unstable; urgency=medium + + * Update to SVN 20120430 (r186999) from the gcc-4_6-branch. + - Fix PR middle-end/53084, PR lto/48246, PR target/53138. + * Don't build multilib gnat on armel and armhf. + * Don't try to run the libstdc++ testsuite if the C++ frontend isn't built. + * Don't configure with --enable-gnu-unique-object on kfreebsd and hurd. + * Treat wheezy the same as sid in more places (Peter Green). Closes: #670821. + * Fix setting MULTILIB_DEFAULTS for ARM multilib builds. + + -- Matthias Klose Mon, 30 Apr 2012 22:38:54 +0200 + +gcc-4.6 (4.6.3-4) unstable; urgency=low + + [ Matthias Klose ] + * Update to SVN 20120416 (r186492) from the gcc-4_6-branch. + - Fix PR middle-end/52894, PR target/52717, PR target/52775, + PR target/52775. + * Update the Linaro support to the 4.6-2012.04 release. + * Fix PR middle-end/52870, taken from the trunk (Ulrich Weigand). + Linaro only. LP: #968766. + * Fix ICE (regression) in Linaro gcc-4.6 (Ulrich Weigand). + LP: #972648. + * Don't build ARM biarch runtime libraries, now built from the + gcc-4.7 sources. + * Set the ARM hard-float linker path according to the consensus: + http://lists.linaro.org/pipermail/cross-distro/2012-April/000261.html + + [ Samuel Thibault ] + * ada-s-osinte-gnu.adb.diff, ada-s-osinte-gnu.ads.diff, + ada-s-taprop-gnu.adb.diff, gcc_ada_gcc-interface_Makefile.in.diff: + Add ada support for GNU/Hurd, thanks Svante Signell for the patches + and bootstrap! (Closes: #668425). + + -- Matthias Klose Mon, 16 Apr 2012 14:17:30 +0200 + +gcc-4.6 (4.6.3-3ubuntu1) precise; urgency=low + + * Update to SVN 20120414 (r186373) from the gcc-4_6-branch. + - Fix PR middle-end/52894, PR target/52717. + * Update the Linaro support to the 4.6-2012.04 release. + * Fix PR middle-end/52870, taken from the trunk (Ulrich Weigand). + Linaro only. LP: #968766. + * Fix ICE (regression) in Linaro gcc-4.6 (Ulrich Weigand). + LP: #972648. + * Don't build ARM biarch runtime libraries, now built from the + gcc-4.7 sources. + + -- Matthias Klose Thu, 12 Apr 2012 15:30:44 +0200 + +gcc-4.6 (4.6.3-3) unstable; urgency=low + + * Update to SVN 20120406 (r186200) from the gcc-4_6-branch. + - Fix PR c++/52796. + + [ Matthias Klose ] + * Re-add missing dependency on libgcc in gcc-multilib. Closes: #667519. + * Add support for GNU locales for GNU/Hurd (Svante Signell). + Closes: #667662. + * Reenable the spu build on ppc64. Closes: #664617. + * Apply proposed patch for PR52894, stage1 bootstrap failure on hppa + (John David Anglin). Closes: #667969. + + [ Nobuhiro Iwamatsu ] + * Fix cross build targeting sh4. Closes: #663028. + * Enable -mieee by default on sh4. Closes: #665328. + + -- Matthias Klose Sun, 08 Apr 2012 15:22:12 +0200 + +gcc-4.6 (4.6.3-2) unstable; urgency=low + + * Update to SVN 20120403 (r186115) from the gcc-4_6-branch. + - Fix PR target/52698, PR middle-end/51200, PR middle-end/52693, + PR target/52741, PR target/52736, PR regression/52696, + PR middle-end/51737, PR middle-end/52640, PR middle-end/48600, + PR pch/45979, PR rtl-optimization/52528, PR Bug middle-end/50232, + PR target/51871, PR target/50310, PR target/52408, PR target/52425, + PR tree-optimization/50031, PR tree-optimization/50969, + PR fortran/52469, PR fortran/52452, PR libstdc++/52433, + PR boehm-gc/52179, PR target/49461. + + [ Matthias Klose ] + * Merge from gnat-4.6 4.6.3-1: + * debian/control.m4: remove dependencies on ada-compiler which is being + phased out. + * debian/patches/ada-libgnatvsn.diff (gnatvsn.ads): new hunk, revert an + upstream change to Current_Year to preserve the aliversion. + * Include -print-multiarch option in gcc --help output. Closes: #656998. + * Re-add build dependency on doxygen. + * Stop building runtime packages now built by gcc-4.7. + * Stop building gccgo-4.6, predating the Go language version 1. + * Drop the 4.6.2 symlink. + * Always configure with --enable-gnu-unique-object. LP: #949805. + * Fix ARM ABI conformance regression, taken from trunk/Linaro. + + [ Samuel Thibault ] + * Apply hurd-fixes.diff on hurd-any. + * debian/patches/ada-bug564232.diff: Enable on hurd too. + * debian/patches/ada-libgnatprj.diff: Add hurd configuration. + + -- Matthias Klose Wed, 04 Apr 2012 00:10:02 +0200 + +gcc-4.6 (4.6.3-1ubuntu4) precise; urgency=low + + * Re-add build dependency on doxygen. LP: #963777. + * Stop building the Go packages (now built by gccgo-4.7). + * Fix PR middle-end/52870, taken from the trunk (Ulrich Weigand). + Linaro only. LP: #968766. + + -- Matthias Klose Tue, 03 Apr 2012 20:04:25 +0200 + +gcc-4.6 (4.6.3-1ubuntu3) precise; urgency=low + + * Drop the 4.6.2 symlink. + * Always configure with --enable-gnu-unique-object. LP: #949805. + * Fix ARM ABI conformance regression, taken from trunk/Linaro. + + -- Matthias Klose Thu, 08 Mar 2012 23:01:13 +0100 + +gcc-4.6 (4.6.3-1ubuntu2) precise; urgency=low + + * Merge with Debian. + + -- Matthias Klose Thu, 01 Mar 2012 15:42:05 +0100 + +gcc-4.6 (4.6.3-1) unstable; urgency=low + + * GCC 4.6.3 release. + + [ Matthias Klose ] + * Linaro only: + Backport bug fixes (r106870, r106873) from the Linaro branch. LP: #922474. + * Fix PR target/50946, taken from the trunk. Closes: #641849. + + [ Thorsten Glaser ] + * Backport PR rtl-optimization/47612 from the trunk, apply for m68k only. + * Don't set the bootstrap-lean target unconditionally. + + -- Matthias Klose Thu, 01 Mar 2012 15:31:27 +0100 + +gcc-4.6 (4.6.2-16ubuntu1) precise; urgency=low + + * Merge with Debian. + + -- Matthias Klose Thu, 23 Feb 2012 17:44:03 +0100 + +gcc-4.6 (4.6.2-16) unstable; urgency=low + + * Update to SVN 20120223 (r184520) from the gcc-4_6-branch (supposed + to become the 4.6.3 release candidate). + - Fix PR tree-optimization/52286, PR c/52290, PR target/52330, + PR target/52294, PR target/52238, PR libstdc++/52300, + PR libstdc++/52317, PR libstdc++/52309. + + [ Marcin Juszkiewicz ] + * Fix ARM sf/hf multilib dpkg-shlibdeps dependency generation. + + -- Matthias Klose Thu, 23 Feb 2012 19:39:11 +0100 + +gcc-4.6 (4.6.2-15) unstable; urgency=low + + * Update to SVN 20120219 (r184373) from the gcc-4_6-branch. + - Fix PR tree-optimization/46886, PR debug/51950, PR c/52181, + PR middle-end/52230, PR bootstrap/51969, PR c++/52247, PR c/5218, + PR debug/52260, PR target/52199. + * Don't add ARM sf/hf conflicts for cross package builds. LP: #913734. + + -- Matthias Klose Sun, 19 Feb 2012 15:01:53 +0100 + +gcc-4.6 (4.6.2-14ubuntu2) precise; urgency=low + + * Update to SVN 20120215 (r184282) from the gcc-4_6-branch. + - Fix PR tree-optimization/46886, PR debug/51950, PR c/52181, + PR middle-end/52230, PR bootstrap/51969, PR c++/52247, PR c/5218. + * Don't add ARM sf/hf conflicts for cross package builds. LP: #913734. + + -- Matthias Klose Thu, 16 Feb 2012 00:28:55 +0100 + +gcc-4.6 (4.6.2-14ubuntu1) precise; urgency=low + + * Merge with Debian. + + -- Matthias Klose Fri, 10 Feb 2012 19:34:24 +0100 + +gcc-4.6 (4.6.2-14) unstable; urgency=low + + * Update to SVN 20120210 (r184105) from the gcc-4_6-branch. + - Fix PR rtl-optimization/52139, PR rtl-optimization/52060, + PR middle-end/52074, PR target/52129, PR middle-end/48071, + PR target/52006, PR libmudflap/40778, PR rtl-optimization/51767, + PR middle-end/51768, PR middle-end/44777, PR debug/51695, PR c/51360, + PR debug/51517, PR middle-end/52140, PR target/51106, PR c++/51669, + PR driver/48306, PR tree-optimization/49536. + * Fix libstdc++-dev control file for cross builds. + + -- Matthias Klose Fri, 10 Feb 2012 19:11:07 +0100 + +gcc-4.6 (4.6.2-13) unstable; urgency=low + + * Update to SVN 20120208 (r184026) from the gcc-4_6-branch. + - Fix PR middle-end/51994, PR target/40068, PR target/52107, + PR tree-optimization/51118, PR rtl-optimization/51374, PR target/51835, + PR target/50313, PR middle-end/45678, PR ada/46192, PR fortran/52151, + PR fortran/52093, PR fortran/52012, PR fortran/52022, PR fortran/51966, + PR fortran/51948, PR fortran/51913, PR libstdc++/51795, PR libjava/48512. + + * Install libstdc++ -gdb.py file into /usr/lib/debug. + Closes: #652160, #653446. + * Configure --with-system-root, remove trailing slash from system root. + * Strip whitespace from with_libssp definition. Closes: #653255. + * Fix control file generation for cross packages. LP: #913734. + * Update the Linaro support to the 4.6-2012.01-1 release. + + -- Matthias Klose Thu, 09 Feb 2012 01:06:15 +0100 + +gcc-4.6 (4.6.2-12ubuntu1) precise; urgency=low + + * Merge with Debian. + + -- Matthias Klose Fri, 20 Jan 2012 12:10:59 +0100 + +gcc-4.6 (4.6.2-12) unstable; urgency=low + + * Update to SVN 20120120 (r183333) from the gcc-4_6-branch. + - Fix PR middle-end/48660, PR tree-optimization/51315, + PR target/51756 (avr), PR rtl-optimization/38644, PR ada/41929, + PR target/48743 (x86), PR tree-optimization/49642, + PR rtl-optimization/51821, PR tree-optimization/51759, + PR rtl-optimization/51821, PR target/51623, PR c++/51854, PR c++/51868, + PR c++/51344, PR fortran/51800, PR fortran/51904. + + [ Matthias Klose ] + * Update the Linaro support to the 4.6-2012.01-1 release. + + [ Marcin Juszkiewicz ] + * Fix control file generation for ARM multiarch cross builds. + + -- Matthias Klose Fri, 20 Jan 2012 12:05:27 +0100 + +gcc-4.6 (4.6.2-11) unstable; urgency=low + + * Update to SVN 20120104 (r182901) from the gcc-4_6-branch. + - Fix PR tree-optimization/51624. + * gcc-volatile-bitfields.diff: Remove, integrated upstream. + * Replace Fortran 95 with Fortran in package descriptions. + + -- Matthias Klose Thu, 05 Jan 2012 06:04:20 +0100 gcc-4.6 (4.6.2-10ubuntu1) precise; urgency=low diff -Nru gcc-4.6-4.6.2/debian/control gcc-4.6-4.6.4/debian/control --- gcc-4.6-4.6.2/debian/control 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/control 2013-04-14 23:00:34.000000000 +0000 @@ -3,9 +3,19 @@ Priority: optional Maintainer: Ubuntu Core developers XSBC-Original-Maintainer: Debian GCC Maintainers -Uploaders: Matthias Klose , Arthur Loiret -Standards-Version: 3.9.2 -Build-Depends: dpkg-dev (>= 1.14.15), debhelper (>= 5.0.62), g++-multilib [amd64 i386 kfreebsd-amd64 mips mipsel powerpc ppc64 s390 s390x sparc], libc6.1-dev (>= 2.5) [alpha ia64] | libc0.3-dev (>= 2.5) [hurd-i386] | libc0.1-dev (>= 2.5) [kfreebsd-i386 kfreebsd-amd64] | libc6-dev (>= 2.5), libc6-dev-amd64 [i386], libc6-dev-sparc64 [sparc], libc6-dev-s390 [s390x], libc6-dev-s390x [s390], libc6-dev-i386 [amd64], libc6-dev-powerpc [ppc64], libc6-dev-ppc64 [powerpc], libc0.1-dev-i386 [kfreebsd-amd64], lib32gcc1 [amd64 ppc64 kfreebsd-amd64 s390x], lib64gcc1 [i386 powerpc sparc s390], libc6-dev-mips64 [mips mipsel], libc6-dev-mipsn32 [mips mipsel], m4, libtool, autoconf2.64, automake (>= 1:1.11), automake (<< 1:1.12), libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], zlib1g-dev, gawk, lzma, xz-utils, patchutils, binutils (>= 2.21.1) | binutils-multiarch (>= 2.21.1), binutils-hppa64 (>= 2.21.1) [hppa], gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, texinfo (>= 4.3), locales [!knetbsd-i386 !knetbsd-alpha], procps, sharutils, binutils-spu (>= 2.21.1) [powerpc ppc64], newlib-spu (>= 1.16.0) [powerpc ppc64], libcloog-ppl-dev (>= 0.15.9-2~), libmpc-dev, libmpfr-dev (>= 3.0.0-9~), libgmp-dev (>= 2:5.0.1~), libelfg0-dev (>= 0.8.12), dejagnu [!m68k !hurd-i386 !hurd-alpha], autogen, realpath (>= 1.9.12), chrpath, lsb-release, make (>= 3.81), quilt +Uploaders: Matthias Klose +Standards-Version: 3.9.4 +Build-Depends: dpkg-dev (>= 1.14.15), debhelper (>= 5.0.62), g++-multilib [amd64 i386 kfreebsd-amd64 mips mipsel powerpc ppc64 s390 s390x sparc], + libc6.1-dev (>= 2.11) [alpha ia64] | libc0.3-dev (>= 2.11) [hurd-i386] | libc0.1-dev (>= 2.11) [kfreebsd-i386 kfreebsd-amd64] | libc6-dev (>= 2.11), libc6-dev-amd64 [i386], libc6-dev-sparc64 [sparc], libc6-dev-s390 [s390x], libc6-dev-s390x [s390], libc6-dev-i386 [amd64], libc6-dev-powerpc [ppc64], libc6-dev-ppc64 [powerpc], libc0.1-dev-i386 [kfreebsd-amd64], lib32gcc1 [amd64 ppc64 kfreebsd-amd64 s390x], lib64gcc1 [i386 powerpc sparc s390], libc6-dev-mips64 [mips mipsel], libc6-dev-mipsn32 [mips mipsel], + m4, libtool, autoconf2.64, automake (>= 1:1.11), automake (<< 1:1.12), + libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], + zlib1g-dev, gawk, lzma, xz-utils, patchutils, + binutils (>= 2.21.1) | binutils-multiarch (>= 2.21.1), binutils-hppa64 (>= 2.21.1) [hppa], + gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, texinfo (>= 4.3), locales, sharutils, + procps, binutils-spu (>= 2.21.1) [powerpc ppc64], newlib-spu (>= 1.16.0) [powerpc ppc64], + libcloog-ppl-dev (>= 0.15.9-2~), libmpc-dev, libmpfr-dev (>= 3.0.0-9~), libgmp-dev (>= 2:5.0.1~), libelfg0-dev (>= 0.8.12), dejagnu [!m68k !hurd-i386 !hurd-alpha], autogen, + realpath (>= 1.9.12), chrpath, lsb-release, make (>= 3.81), quilt +Build-Depends-Indep: doxygen (>= 1.7.2), graphviz (>= 2.2), gsfonts-x11, texlive-latex-base, Build-Conflicts: binutils-gold Homepage: http://gcc.gnu.org/ XS-Vcs-Browser: http://svn.debian.org/viewsvn/gcccvs/branches/sid/gcc-4.6/ @@ -17,124 +27,11 @@ Priority: required Depends: ${misc:Depends} Replaces: ${base:Replaces} -Breaks: gcj-4.6-base (<< 4.6.1-3ubuntu3), gnat-4.6 (<< 4.6.1-4ubuntu3) +Breaks: gcj-4.6-base (<< 4.6.1-4~), gnat-4.6 (<< 4.6.1-5~), dehydra (<= 0.9.hg20110609-2) Description: GCC, the GNU Compiler Collection (base package) This package contains files common to all languages and libraries contained in the GNU Compiler Collection (GCC). -Package: libgcc1 -Architecture: any -Section: libs -Priority: required -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Provides: libgcc1-armel [armel], libgcc1-armhf [armhf] -Description: GCC support library - Shared version of the support library, a library of internal subroutines - that GCC uses to overcome shortcomings of particular machines, or - special needs for some languages. - -Package: libgcc1-dbg -Architecture: any -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libgcc1 (= ${gcc:EpochVersion}), ${misc:Depends} -Provides: libgcc1-dbg-armel [armel], libgcc1-dbg-armhf [armhf] -Description: GCC support library (debug symbols) - Debug symbols for the GCC support library. - -Package: libgcc2 -Architecture: m68k -Section: libs -Priority: required -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: GCC support library - Shared version of the support library, a library of internal subroutines - that GCC uses to overcome shortcomings of particular machines, or - special needs for some languages. - -Package: libgcc2-dbg -Architecture: m68k -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libgcc2 (= ${gcc:Version}), ${misc:Depends} -Description: GCC support library (debug symbols) - Debug symbols for the GCC support library. - -Package: libgcc4 -Architecture: hppa -Section: libs -Priority: required -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: GCC support library - Shared version of the support library, a library of internal subroutines - that GCC uses to overcome shortcomings of particular machines, or - special needs for some languages. - -Package: libgcc4-dbg -Architecture: hppa -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libgcc4 (= ${gcc:Version}), ${misc:Depends} -Description: GCC support library (debug symbols) - Debug symbols for the GCC support library. - -Package: lib64gcc1 -Architecture: i386 powerpc sparc s390 mips mipsel -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${misc:Depends} -Conflicts: libgcc1 (<= 1:3.3-0pre9) -Description: GCC support library (64bit) - Shared version of the support library, a library of internal subroutines - that GCC uses to overcome shortcomings of particular machines, or - special needs for some languages. - -Package: lib64gcc1-dbg -Architecture: i386 powerpc sparc s390 mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib64gcc1 (= ${gcc:EpochVersion}), ${misc:Depends} -Description: GCC support library (debug symbols) - Debug symbols for the GCC support library. - -Package: lib32gcc1 -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: libs -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${misc:Depends} -Conflicts: ${confl:lib32} -Description: GCC support library (32 bit Version) - Shared version of the support library, a library of internal subroutines - that GCC uses to overcome shortcomings of particular machines, or - special needs for some languages. - -Package: lib32gcc1-dbg -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32gcc1 (= ${gcc:EpochVersion}), ${misc:Depends} -Description: GCC support library (debug symbols) - Debug symbols for the GCC support library. - -Package: libn32gcc1 -Architecture: mips mipsel -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${misc:Depends} -Conflicts: libgcc1 (<= 1:3.3-0pre9) -Description: GCC support library (n32) - Shared version of the support library, a library of internal subroutines - that GCC uses to overcome shortcomings of particular machines, or - special needs for some languages. - -Package: libn32gcc1-dbg -Architecture: mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libn32gcc1 (= ${gcc:EpochVersion}), ${misc:Depends} -Description: GCC support library (debug symbols) - Debug symbols for the GCC support library. - Package: gcc-4.6 Architecture: any Section: devel @@ -262,84 +159,6 @@ On architectures with multilib support, the package contains files and dependencies for the non-default multilib architecture(s). -Package: libmudflap0 -Architecture: any -Provides: libmudflap0-armel [armel], libmudflap0-armhf [armhf] -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: GCC mudflap shared support libraries - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: libmudflap0-dbg -Architecture: any -Provides: libmudflap0-dbg-armel [armel], libmudflap0-dbg-armhf [armhf] -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libmudflap0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC mudflap shared support libraries (debug symbols) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: lib32mudflap0 -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Replaces: libmudflap0 (<< 4.1) -Conflicts: ${confl:lib32} -Description: GCC mudflap shared support libraries (32bit) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: lib32mudflap0-dbg -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32mudflap0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC mudflap shared support libraries (32 bit debug symbols) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: lib64mudflap0 -Architecture: i386 powerpc sparc s390 mips mipsel -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Replaces: libmudflap0 (<< 4.1) -Description: GCC mudflap shared support libraries (64bit) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: lib64mudflap0-dbg -Architecture: i386 powerpc sparc s390 mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib64mudflap0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC mudflap shared support libraries (64 bit debug symbols) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: libn32mudflap0 -Architecture: mips mipsel -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Replaces: libmudflap0 (<< 4.1) -Description: GCC mudflap shared support libraries (n32) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - -Package: libn32mudflap0-dbg -Architecture: mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libn32mudflap0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC mudflap shared support libraries (n32 debug symbols) - The libmudflap libraries are used by GCC for instrumenting pointer and array - dereferencing operations. - Package: libmudflap0-4.6-dev Architecture: any Section: libdevel @@ -353,157 +172,6 @@ . This package contains the headers and the static libraries. -Package: libgomp1 -Section: libs -Architecture: any -Provides: libgomp1-armel [armel], libgomp1-armhf [armhf] -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: GCC OpenMP (GOMP) support library - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: libgomp1-dbg -Architecture: any -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libgomp1 (= ${gcc:Version}), ${misc:Depends} -Provides: libgomp1-dbg-armel [armel], libgomp1-dbg-armhf [armhf] -Description: GCC OpenMP (GOMP) support library (debug symbols) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: lib32gomp1 -Section: libs -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: ${confl:lib32} -Description: GCC OpenMP (GOMP) support library (32bit) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: lib32gomp1-dbg -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32gomp1 (= ${gcc:Version}), ${misc:Depends} -Description: GCC OpenMP (GOMP) support library (32 bit debug symbols) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: lib64gomp1 -Section: libs -Architecture: i386 powerpc sparc s390 mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: GCC OpenMP (GOMP) support library (64bit) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: lib64gomp1-dbg -Architecture: i386 powerpc sparc s390 mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib64gomp1 (= ${gcc:Version}), ${misc:Depends} -Description: GCC OpenMP (GOMP) support library (64bit debug symbols) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: libn32gomp1 -Section: libs -Architecture: mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: GCC OpenMP (GOMP) support library (n32) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - in the GNU Compiler Collection. - -Package: libn32gomp1-dbg -Architecture: mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libn32gomp1 (= ${gcc:Version}), ${misc:Depends} -Description: GCC OpenMP (GOMP) support library (n32 debug symbols) - GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers - -Package: libquadmath0 -Section: libs -Architecture: any -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: GCC Quad-Precision Math Library - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. The library is used to provide on such - targets the REAL(16) type in the GNU Fortran compiler. - -Package: libquadmath0-dbg -Architecture: any -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libquadmath0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC Quad-Precision Math Library (debug symbols) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. - -Package: lib32quadmath0 -Section: libs -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: ${confl:lib32} -Description: GCC Quad-Precision Math Library (32bit) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. The library is used to provide on such - targets the REAL(16) type in the GNU Fortran compiler. - -Package: lib32quadmath0-dbg -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32quadmath0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC Quad-Precision Math Library (32 bit debug symbols) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. - -Package: lib64quadmath0 -Section: libs -Architecture: i386 powerpc sparc s390 mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: GCC Quad-Precision Math Library (64bit) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. The library is used to provide on such - targets the REAL(16) type in the GNU Fortran compiler. - -Package: lib64quadmath0-dbg -Architecture: i386 powerpc sparc s390 mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib64quadmath0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC Quad-Precision Math Library (64bit debug symbols) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. - -Package: libn32quadmath0 -Section: libs -Architecture: mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: GCC Quad-Precision Math Library (n32) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. The library is used to provide on such - targets the REAL(16) type in the GNU Fortran compiler. - -Package: libn32quadmath0-dbg -Architecture: mips mipsel -Section: debug -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libn32quadmath0 (= ${gcc:Version}), ${misc:Depends} -Description: GCC Quad-Precision Math Library (n32 debug symbols) - A library, which provides quad-precision mathematical functions on targets - supporting the __float128 datatype. - Package: gobjc++-4.6 Architecture: any Priority: optional @@ -649,238 +317,6 @@ Description: Documentation for the GNU Fortran compiler (gfortran) Documentation for the GNU Fortran compiler in info format. -Package: libgfortran3 -Section: libs -Architecture: any -Provides: libgfortran3-armel [armel], libgfortran3-armhf [armhf] -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: Runtime library for GNU Fortran applications - Library needed for GNU Fortran applications linked against the - shared library. - -Package: libgfortran3-dbg -Section: debug -Architecture: any -Provides: libgfortran3-dbg-armel [armel], libgfortran3-dbg-armhf [armhf] -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libgfortran3 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Fortran applications (debug symbols) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: lib64gfortran3 -Section: libs -Architecture: i386 powerpc sparc s390 mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: Runtime library for GNU Fortran applications (64bit) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: lib64gfortran3-dbg -Section: debug -Architecture: i386 powerpc sparc s390 mips mipsel -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib64gfortran3 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Fortran applications (64bit debug symbols) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: lib32gfortran3 -Section: libs -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: ${confl:lib32} -Description: Runtime library for GNU Fortran applications (32bit) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: lib32gfortran3-dbg -Section: debug -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32gfortran3 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Fortran applications (32 bit debug symbols) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: libn32gfortran3 -Section: libs -Architecture: mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: Runtime library for GNU Fortran applications (n32) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: libn32gfortran3-dbg -Section: debug -Architecture: mips mipsel -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libn32gfortran3 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Fortran applications (n32 debug symbols) - Library needed for GNU Fortran applications linked against the - shared library. - -Package: gccgo-4.6 -Architecture: any -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), gcc-4.6 (= ${gcc:Version}), libgo0 (>= ${gcc:Version}), ${dep:libcdev}, ${shlibs:Depends}, ${misc:Depends} -Provides: go-compiler -Suggests: ${go:multilib}, gccgo-4.6-doc, libgo0-dbg -Description: GNU Go compiler - This is the GNU Go compiler, which compiles Go on platforms supported - by the gcc compiler. It uses the gcc backend to generate optimized code. - -Package: gccgo-4.6-multilib -Architecture: amd64 i386 kfreebsd-amd64 mips mipsel powerpc ppc64 s390 s390x sparc -Section: devel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), gccgo-4.6 (= ${gcc:Version}), gcc-4.6-multilib (= ${gcc:Version}), ${dep:libgobiarch}, ${shlibs:Depends}, ${misc:Depends} -Suggests: ${dep:libgobiarchdbg} -Description: GNU Go compiler (multilib files) - This is the GNU Go compiler, which compiles Go on platforms supported - by the gcc compiler. - . - On architectures with multilib support, the package contains files - and dependencies for the non-default multilib architecture(s). - -Package: gccgo-4.6-doc -Architecture: all -Section: doc -Priority: optional -Depends: gcc-4.6-base (>= ${gcc:SoftVersion}), dpkg (>= 1.15.4) | install-info, ${misc:Depends} -Description: Documentation for the GNU Go compiler (gccgo) - Documentation for the GNU Go compiler in info format. - -Package: libgo0 -Section: libs -Architecture: any -Provides: libgo0-armel [armel], libgo0-armhf [armhf] -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: Runtime library for GNU Go applications - Library needed for GNU Go applications linked against the - shared library. - -Package: libgo0-dbg -Section: debug -Architecture: any -Provides: libgo0-dbg-armel [armel], libgo0-dbg-armhf [armhf] -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libgo0 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Go applications (debug symbols) - Library needed for GNU Go applications linked against the - shared library. - -Package: lib64go0 -Section: libs -Architecture: i386 powerpc sparc s390 mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: Runtime library for GNU Go applications (64bit) - Library needed for GNU Go applications linked against the - shared library. - -Package: lib64go0-dbg -Section: debug -Architecture: i386 powerpc sparc s390 mips mipsel -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib64go0 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Go applications (64bit debug symbols) - Library needed for GNU Go applications linked against the - shared library. - -Package: lib32go0 -Section: libs -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: ${confl:lib32} -Description: Runtime library for GNU Go applications (32bit) - Library needed for GNU Go applications linked against the - shared library. - -Package: lib32go0-dbg -Section: debug -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32go0 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Go applications (32 bit debug symbols) - Library needed for GNU Go applications linked against the - shared library. - -Package: libn32go0 -Section: libs -Architecture: mips mipsel -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Description: Runtime library for GNU Go applications (n32) - Library needed for GNU Go applications linked against the - shared library. - -Package: libn32go0-dbg -Section: debug -Architecture: mips mipsel -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), libn32go0 (= ${gcc:Version}), ${misc:Depends} -Description: Runtime library for GNU Go applications (n32 debug symbols) - Library needed for GNU Go applications linked against the - shared library. - -Package: libstdc++6 -Architecture: any -Section: libs -Priority: required -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Provides: libstdc++6-armel [armel], libstdc++6-armhf [armhf] -Conflicts: scim (<< 1.4.2-1) -Description: GNU Standard C++ Library v3 - This package contains an additional runtime library for C++ programs - built with the GNU compiler. - . - libstdc++-v3 is a complete rewrite from the previous libstdc++-v2, which - was included up to g++-2.95. The first version of libstdc++-v3 appeared - in g++-3.0. - -Package: lib32stdc++6 -Architecture: amd64 ppc64 kfreebsd-amd64 s390x -Section: libs -Priority: extra -Depends: gcc-4.6-base (= ${gcc:Version}), lib32gcc1, ${shlibs:Depends}, ${misc:Depends} -Conflicts: ${confl:lib32} -Description: GNU Standard C++ Library v3 (32 bit Version) - This package contains an additional runtime library for C++ programs - built with the GNU compiler. - -Package: lib64stdc++6 -Architecture: i386 powerpc sparc s390 mips mipsel -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, lib64gcc1, ${misc:Depends} -Description: GNU Standard C++ Library v3 (64bit) - This package contains an additional runtime library for C++ programs - built with the GNU compiler. - . - libstdc++-v3 is a complete rewrite from the previous libstdc++-v2, which - was included up to g++-2.95. The first version of libstdc++-v3 appeared - in g++-3.0. - -Package: libn32stdc++6 -Architecture: mips mipsel -Section: libs -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), ${shlibs:Depends}, libn32gcc1, ${misc:Depends} -Description: GNU Standard C++ Library v3 (n32) - This package contains an additional runtime library for C++ programs - built with the GNU compiler. - . - libstdc++-v3 is a complete rewrite from the previous libstdc++-v2, which - was included up to g++-2.95. The first version of libstdc++-v3 appeared - in g++-3.0. - Package: libstdc++6-4.6-dev Architecture: any Section: libdevel @@ -973,20 +409,6 @@ These are versions of basic static libraries such as libgcc.a compiled with the -msoft-float option, for CPUs without a floating-point unit. -Package: fixincludes -Architecture: any -Priority: optional -Depends: gcc-4.6-base (= ${gcc:Version}), gcc-4.6 (= ${gcc:Version}), ${shlibs:Depends}, ${misc:Depends} -Description: Fix non-ANSI header files - FixIncludes was created to fix non-ANSI system header files. Many - system manufacturers supply proprietary headers that are not ANSI compliant. - The GNU compilers cannot compile non-ANSI headers. Consequently, the - FixIncludes shell script was written to fix the header files. - . - Not all packages with header files are installed on the system, when the - package is built, so we make fixincludes available at build time of other - packages, such that checking tools like lintian can make use of it. - Package: gcc-4.6-doc Architecture: all Section: doc diff -Nru gcc-4.6-4.6.2/debian/control.m4 gcc-4.6-4.6.4/debian/control.m4 --- gcc-4.6-4.6.2/debian/control.m4 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/control.m4 2013-04-14 23:00:34.000000000 +0000 @@ -38,19 +38,34 @@ ifelse(regexp(SRCNAME, `gnat'),0,`dnl Uploaders: Ludovic Brenta ', regexp(SRCNAME, `gdc'),0,`dnl -Uploaders: Iain Buclaw , Arthur Loiret +Uploaders: Iain Buclaw ', `dnl -Uploaders: Matthias Klose , Arthur Loiret +Uploaders: Matthias Klose ')dnl SRCNAME -Standards-Version: 3.9.2 +Standards-Version: 3.9.4 ifdef(`TARGET',`dnl cross -Build-Depends: DPKG_BUILD_DEP debhelper (>= 5.0.62), LIBC_BUILD_DEP, LIBC_BIARCH_BUILD_DEP LIBUNWIND_BUILD_DEP LIBATOMIC_OPS_BUILD_DEP AUTOGEN_BUILD_DEP AUTO_BUILD_DEP SOURCE_BUILD_DEP CROSS_BUILD_DEP CLOOG_BUILD_DEP MPC_BUILD_DEP MPFR_BUILD_DEP GMP_BUILD_DEP ELF_BUILD_DEP, zlib1g-dev, gawk, lzma, xz-utils, patchutils, BINUTILS_BUILD_DEP, bison (>= 1:2.3), flex, realpath (>= 1.9.12), lsb-release, make (>= 3.81), quilt +Build-Depends: DPKG_BUILD_DEP debhelper (>= 5.0.62), + LIBC_BUILD_DEP, LIBC_BIARCH_BUILD_DEP + LIBUNWIND_BUILD_DEP LIBATOMIC_OPS_BUILD_DEP + AUTOGEN_BUILD_DEP AUTO_BUILD_DEP + SOURCE_BUILD_DEP CROSS_BUILD_DEP + CLOOG_BUILD_DEP MPC_BUILD_DEP MPFR_BUILD_DEP GMP_BUILD_DEP + ELF_BUILD_DEP, zlib1g-dev, gawk, lzma, xz-utils, patchutils, + BINUTILS_BUILD_DEP, + bison (>= 1:2.3), flex, realpath (>= 1.9.12), lsb-release, make (>= 3.81), quilt ',`dnl native -Build-Depends: DPKG_BUILD_DEP debhelper (>= 5.0.62), GCC_MULTILIB_BUILD_DEP LIBC_BUILD_DEP, LIBC_BIARCH_BUILD_DEP AUTO_BUILD_DEP AUTOGEN_BUILD_DEP libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], zlib1g-dev, gawk, lzma, xz-utils, patchutils, BINUTILS_BUILD_DEP, binutils-hppa64 (>= BINUTILSV) [hppa], gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, texinfo (>= 4.3), FORTRAN_BUILD_DEP locales [locale_no_archs], procps, sharutils, JAVA_BUILD_DEP GNAT_BUILD_DEP GDC_BUILD_DEP SPU_BUILD_DEP CLOOG_BUILD_DEP MPC_BUILD_DEP MPFR_BUILD_DEP GMP_BUILD_DEP ELF_BUILD_DEP CHECK_BUILD_DEP realpath (>= 1.9.12), chrpath, lsb-release, make (>= 3.81), quilt -ifelse(regexp(SRCNAME, `gcc-snapshot'),0,`dnl +Build-Depends: DPKG_BUILD_DEP debhelper (>= 5.0.62), GCC_MULTILIB_BUILD_DEP + LIBC_BUILD_DEP, LIBC_BIARCH_BUILD_DEP + AUTO_BUILD_DEP AUTOGEN_BUILD_DEP + libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], + zlib1g-dev, gawk, lzma, xz-utils, patchutils, + BINUTILS_BUILD_DEP, binutils-hppa64 (>= BINUTILSV) [hppa], + gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, texinfo (>= 4.3), locales, sharutils, + procps, FORTRAN_BUILD_DEP JAVA_BUILD_DEP GNAT_BUILD_DEP GDC_BUILD_DEP SPU_BUILD_DEP + CLOOG_BUILD_DEP MPC_BUILD_DEP MPFR_BUILD_DEP GMP_BUILD_DEP ELF_BUILD_DEP CHECK_BUILD_DEP + realpath (>= 1.9.12), chrpath, lsb-release, make (>= 3.81), quilt Build-Depends-Indep: LIBSTDCXX_BUILD_INDEP JAVA_BUILD_INDEP ')dnl -')dnl Build-Conflicts: binutils-gold ifelse(regexp(SRCNAME, `gnat'),0,`dnl Homepage: http://gcc.gnu.org/ @@ -70,7 +85,7 @@ Depends: binutils`'TS (>= ${binutils:Version}), ${dep:libcbiarchdev}, ${dep:libcdev}, ${dep:libunwinddev}, ${snap:depends}, ${shlibs:Depends}, ${dep:ecj}, python, ${misc:Depends} Recommends: ${snap:recommends} Suggests: ${dep:gold} -Provides: c++-compiler`'TS`'ifdef(`TARGET)',`',`, c++abi2-dev') +Provides: c++-compiler`'TS`'ifdef(`TARGET',`',`, c++abi2-dev') Description: A SNAPSHOT of the GNU Compiler Collection This package contains a recent development SNAPSHOT of all files contained in the GNU Compiler Collection (GCC). @@ -107,7 +122,7 @@ Priority: PRI(required) Depends: ${misc:Depends} Replaces: ${base:Replaces} -Breaks: gcj-4.6-base (<< 4.6.1-3ubuntu3), gnat-4.6 (<< 4.6.1-4ubuntu3) +Breaks: gcj-4.6-base (<< 4.6.1-4~), gnat-4.6 (<< 4.6.1-5~), dehydra (<= 0.9.hg20110609-2) Description: GCC, the GNU Compiler Collection (base package) This package contains files common to all languages and libraries contained in the GNU Compiler Collection (GCC). @@ -167,8 +182,8 @@ Section: ifdef(`TARGET',`devel',`libs') Priority: ifdef(`TARGET',`extra',required) Depends: BASEDEP, ${shlibs:Depends}, ${misc:Depends} -ifdef(`TARGET',`Provides: libgcc1-TARGET-dcv1 -',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`Provides: libgcc1-TARGET-dcv1', +ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support Breaks: ${multiarch:breaks} ')`Provides: libgcc1-armel [armel], libgcc1-armhf [armhf]') @@ -187,8 +202,8 @@ Section: debug Priority: extra Depends: BASEDEP, libgcc1`'LS (= ${gcc:EpochVersion}), ${misc:Depends} -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same -')`Provides: libgcc1-dbg-armel [armel], libgcc1-dbg-armhf [armhf]') +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same')) +ifdef(`TARGET',`dnl',`Provides: libgcc1-dbg-armel [armel], libgcc1-dbg-armhf [armhf]') Description: GCC support library (debug symbols)`'ifdef(`TARGET)',` (TARGET)', `') Debug symbols for the GCC support library. ifdef(`TARGET', `dnl @@ -374,7 +389,7 @@ Section: debug Priority: extra Depends: BASEDEP, libhfgcc1`'LS (= ${gcc:EpochVersion}), ${misc:Depends} -Conflicts: libgcc1-dbg-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgcc1-dbg-armhf [biarchhf_archs]') Description: GCC support library (debug symbols)`'ifdef(`TARGET)',` (TARGET)', `') Debug symbols for the GCC support library. ifdef(`TARGET', `dnl @@ -408,7 +423,7 @@ Section: debug Priority: extra Depends: BASEDEP, libsfgcc1`'LS (= ${gcc:EpochVersion}), ${misc:Depends} -Conflicts: libgcc1-dbg-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgcc1-dbg-armel [biarchsf_archs]') Description: GCC support library (debug symbols)`'ifdef(`TARGET)',` (TARGET)', `') Debug symbols for the GCC support library. ifdef(`TARGET', `dnl @@ -650,8 +665,9 @@ ifenabled(`libmudf',` Package: libmudflap`'MF_SO`'LS Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support +Breaks: ${multiarch:breaks} ')`Provides: libmudflap'MF_SO`-armel [armel], libmudflap'MF_SO`-armhf [armhf]') Section: ifdef(`TARGET',`devel',`libs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') @@ -662,7 +678,7 @@ Package: libmudflap`'MF_SO-dbg`'LS Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same ')`Provides: libmudflap'MF_SO`-dbg-armel [armel], libmudflap'MF_SO`-dbg-armhf [armhf]') Section: debug Priority: extra @@ -734,7 +750,7 @@ Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Section: ifdef(`TARGET',`devel',`libs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') -Conflicts: libmudflap`'MF_SO`'-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libmudflap`'MF_SO`'-armhf [biarchhf_archs]') Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} Description: GCC mudflap shared support libraries (hard float) The libmudflap libraries are used by GCC for instrumenting pointer and array @@ -745,7 +761,7 @@ Section: debug Priority: extra Depends: BASEDEP, libhfmudflap`'MF_SO`'LS (= ${gcc:Version}), ${misc:Depends} -Conflicts: libmudflap`'MF_SO`'-dbg-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libmudflap`'MF_SO`'-dbg-armhf [biarchhf_archs]') Description: GCC mudflap shared support libraries (hard float debug symbols) The libmudflap libraries are used by GCC for instrumenting pointer and array dereferencing operations. @@ -757,7 +773,7 @@ Section: ifdef(`TARGET',`devel',`libs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libmudflap`'MF_SO`'-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libmudflap`'MF_SO`'-armel [biarchsf_archs]') Description: GCC mudflap shared support libraries (soft float) The libmudflap libraries are used by GCC for instrumenting pointer and array dereferencing operations. @@ -767,7 +783,7 @@ Section: debug Priority: extra Depends: BASEDEP, libsfmudflap`'MF_SO`'LS (= ${gcc:Version}), ${misc:Depends} -Conflicts: libmudflap`'MF_SO`'-dbg-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libmudflap`'MF_SO`'-dbg-armel [biarchsf_archs]') Description: GCC mudflap shared support libraries (soft float debug symbols) The libmudflap libraries are used by GCC for instrumenting pointer and array dereferencing operations. @@ -863,8 +879,9 @@ Package: libgomp`'GOMP_SO`'LS Section: ifdef(`TARGET',`devel',`libs') Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support +Breaks: ${multiarch:breaks} ')`Provides: libgomp'GOMP_SO`-armel [armel], libgomp'GOMP_SO`-armhf [armhf]') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${shlibs:Depends}, ${misc:Depends} @@ -877,7 +894,7 @@ Section: debug Priority: extra Depends: BASEDEP, libgomp`'GOMP_SO`'LS (= ${gcc:Version}), ${misc:Depends} -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same ')`Provides: libgomp'GOMP_SO`-dbg-armel [armel], libgomp'GOMP_SO`-dbg-armhf [armhf]') Description: GCC OpenMP (GOMP) support library (debug symbols) GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers @@ -943,7 +960,7 @@ Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libgomp'GOMP_SO`-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgomp'GOMP_SO`-armhf [biarchhf_archs]') Description: GCC OpenMP (GOMP) support library (hard float ABI) GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers in the GNU Compiler Collection. @@ -953,7 +970,7 @@ Section: debug Priority: extra Depends: BASEDEP, libhfgomp`'GOMP_SO`'LS (= ${gcc:Version}), ${misc:Depends} -Conflicts: libgomp'GOMP_SO`-dbg-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgomp'GOMP_SO`-dbg-armhf [biarchhf_archs]') Description: GCC OpenMP (GOMP) support library (hard float ABI debug symbols) GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers ')`'dnl libhfgomp @@ -964,7 +981,7 @@ Architecture: ifdef(`TARGET',`all',`biarchsf_archs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libgomp'GOMP_SO`-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgomp'GOMP_SO`-armel [biarchsf_archs]') Description: GCC OpenMP (GOMP) support library (soft float ABI) GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers in the GNU Compiler Collection. @@ -974,7 +991,7 @@ Section: debug Priority: extra Depends: BASEDEP, libsfgomp`'GOMP_SO`'LS (= ${gcc:Version}), ${misc:Depends} -Conflicts: libgomp'GOMP_SO`-dbg-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgomp'GOMP_SO`-dbg-armel [biarchsf_archs]') Description: GCC OpenMP (GOMP) support library (soft float ABI debug symbols) GOMP is an implementation of OpenMP for the C, C++, and Fortran compilers ')`'dnl libsfgomp @@ -998,7 +1015,7 @@ Package: libquadmath`'QMATH_SO`'LS Section: ifdef(`TARGET',`devel',`libs') Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: ifdef(`TARGET',`extra',`PRI(optional)') @@ -1013,7 +1030,7 @@ Section: debug Priority: extra Depends: BASEDEP, libquadmath`'QMATH_SO`'LS (= ${gcc:Version}), ${misc:Depends} -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same '))`'dnl Description: GCC Quad-Precision Math Library (debug symbols) A library, which provides quad-precision mathematical functions on targets @@ -1182,9 +1199,10 @@ Package: libobjc`'OBJC_SO`'LS Section: ifdef(`TARGET',`devel',`libs') Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support -')`Provides: libobjc'OBJC_SO`-armel [armel], libobjc'OBJC_SO`-armhf [armhf]') +ifelse(OBJC_SO,`2',`Breaks: ${multiarch:breaks} +',`')')`Provides: libobjc'OBJC_SO`-armel [armel], libobjc'OBJC_SO`-armhf [armhf]') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${shlibs:Depends}, ${misc:Depends} Description: Runtime library for GNU Objective-C applications @@ -1193,7 +1211,7 @@ Package: libobjc`'OBJC_SO-dbg`'LS Section: debug Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same ')`Provides: libobjc'OBJC_SO`-dbg-armel [armel], libobjc'OBJC_SO`-dbg-armhf [armhf]') Priority: extra Depends: BASEDEP, libobjc`'OBJC_SO`'LS (= ${gcc:Version}), libgcc`'GCC_SO-dbg`'LS, ${misc:Depends} @@ -1262,7 +1280,7 @@ Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libobjc'OBJC_SO`-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libobjc'OBJC_SO`-armhf [biarchhf_archs]') Description: Runtime library for GNU Objective-C applications (hard float ABI) Library needed for GNU ObjC applications linked against the shared library. @@ -1271,7 +1289,7 @@ Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Priority: extra Depends: BASEDEP, libhfobjc`'OBJC_SO`'LS (= ${gcc:Version}), libhfgcc`'GCC_SO-dbg`'LS, ${misc:Depends} -Conflicts: libobjc'OBJC_SO`-dbg-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libobjc'OBJC_SO`-dbg-armhf [biarchhf_archs]') Description: Runtime library for GNU Objective-C applications (hard float ABI debug symbols) Library needed for GNU ObjC applications linked against the shared library. ')`'dnl libhfobjc @@ -1282,7 +1300,7 @@ Architecture: ifdef(`TARGET',`all',`biarchsf_archs') Priority: ifdef(`TARGET',`extra',`PRI(optional)') Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libobjc'OBJC_SO`-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libobjc'OBJC_SO`-armel [biarchsf_archs]') Description: Runtime library for GNU Objective-C applications (soft float ABI) Library needed for GNU ObjC applications linked against the shared library. @@ -1291,7 +1309,7 @@ Architecture: ifdef(`TARGET',`all',`biarchsf_archs') Priority: extra Depends: BASEDEP, libsfobjc`'OBJC_SO`'LS (= ${gcc:Version}), libsfgcc`'GCC_SO-dbg`'LS, ${misc:Depends} -Conflicts: libobjc'OBJC_SO`-dbg-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libobjc'OBJC_SO`-dbg-armel [biarchsf_archs]') Description: Runtime library for GNU Objective-C applications (soft float ABI debug symbols) Library needed for GNU ObjC applications linked against the shared library. ')`'dnl libsfobjc @@ -1353,8 +1371,9 @@ Package: libgfortran`'FORTRAN_SO`'LS Section: ifdef(`TARGET',`devel',`libs') Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support +Breaks: ${multiarch:breaks} ')`Provides: libgfortran'FORTRAN_SO`-armel [armel], libgfortran'FORTRAN_SO`-armhf [armhf]') Priority: ifdef(`TARGET',`extra',PRI(optional)) Depends: BASEDEP, ${shlibs:Depends}, ${misc:Depends} @@ -1365,7 +1384,7 @@ Package: libgfortran`'FORTRAN_SO-dbg`'LS Section: debug Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same ')`Provides: libgfortran'FORTRAN_SO`-dbg-armel [armel], libgfortran'FORTRAN_SO`-dbg-armhf [armhf]') Priority: extra Depends: BASEDEP, libgfortran`'FORTRAN_SO`'LS (= ${gcc:Version}), ${misc:Depends} @@ -1441,7 +1460,7 @@ Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Priority: ifdef(`TARGET',`extra',PRI(optional)) Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libgfortran'FORTRAN_SO`-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgfortran'FORTRAN_SO`-armhf [biarchhf_archs]') Description: Runtime library for GNU Fortran applications (hard float ABI) Library needed for GNU Fortran applications linked against the shared library. @@ -1451,7 +1470,7 @@ Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Priority: extra Depends: BASEDEP, libhfgfortran`'FORTRAN_SO`'LS (= ${gcc:Version}), ${misc:Depends} -Conflicts: libgfortran'FORTRAN_SO`-dbg-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgfortran'FORTRAN_SO`-dbg-armhf [biarchhf_archs]') Description: Runtime library for GNU Fortran applications (hard float ABI debug symbols) Library needed for GNU Fortran applications linked against the shared library. @@ -1463,7 +1482,7 @@ Architecture: ifdef(`TARGET',`all',`biarchsf_archs') Priority: ifdef(`TARGET',`extra',PRI(optional)) Depends: BASEDEP, ${dep:libcbiarch}, ${shlibs:Depends}, ${misc:Depends} -Conflicts: libgfortran'FORTRAN_SO`-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgfortran'FORTRAN_SO`-armel [biarchsf_archs]') Description: Runtime library for GNU Fortran applications (soft float ABI) Library needed for GNU Fortran applications linked against the shared library. @@ -1473,7 +1492,7 @@ Architecture: ifdef(`TARGET',`all',`biarchsf_archs') Priority: extra Depends: BASEDEP, libsfgfortran`'FORTRAN_SO`'LS (= ${gcc:Version}), ${misc:Depends} -Conflicts: libgfortran'FORTRAN_SO`-dbg-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libgfortran'FORTRAN_SO`-dbg-armel [biarchsf_archs]') Description: Runtime library for GNU Fortran applications (hard float ABI debug symbols) Library needed for GNU Fortran applications linked against the shared library. @@ -1485,6 +1504,7 @@ Architecture: NEON_ARCHS ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support +Breaks: ${multiarch:breaks} ')`'dnl Priority: extra Depends: BASEDEP, libgcc1-neon`'LS, ${shlibs:Depends}, ${misc:Depends} @@ -1539,7 +1559,7 @@ Package: libgo`'GO_SO`'LS Section: ifdef(`TARGET',`devel',`libs') Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support ')`Provides: libgo'GO_SO`-armel [armel], libgo'GO_SO`-armhf [armhf]') Priority: ifdef(`TARGET',`extra',PRI(optional)) @@ -1551,7 +1571,7 @@ Package: libgo`'GO_SO-dbg`'LS Section: debug Architecture: ifdef(`TARGET',`all',`any') -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same ')`Provides: libgo'GO_SO`-dbg-armel [armel], libgo'GO_SO`-dbg-armhf [armhf]') Priority: extra Depends: BASEDEP, libgo`'GO_SO`'LS (= ${gcc:Version}), ${misc:Depends} @@ -1826,11 +1846,12 @@ Package: libstdc++CXX_SO`'LS Architecture: ifdef(`TARGET',`all',`any') Section: ifdef(`TARGET',`devel',`libs') -Priority: ifdef(`TARGET',`extra',PRI(required)) -Depends: BASEDEP, ${shlibs:Depends}, ${misc:Depends} -ifdef(`TARGET',`Provides: libstdc++CXX_SO-TARGET-dcv1 -',ifdef(`MULTIARCH', `Multi-Arch: same +Priority: ifdef(`TARGET',`extra',PRI(important)) +Depends: BASEDEP, ${dep:libc}, ${shlibs:Depends}, ${misc:Depends} +ifdef(`TARGET',`Provides: libstdc++CXX_SO-TARGET-dcv1', +ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support +Breaks: ${multiarch:breaks} ')`Provides: libstdc++'CXX_SO`-armel [armel], libstdc++'CXX_SO`-armhf [armhf]') Conflicts: scim (<< 1.4.2-1) Description: GNU Standard C++ Library v3`'ifdef(`TARGET)',` (TARGET)', `') @@ -1918,7 +1939,7 @@ Depends: BASEDEP, ${shlibs:Depends}, libhfgcc1`'LS, ${misc:Depends} ifdef(`TARGET',`Provides: libhfstdc++CXX_SO-TARGET-dcv1 ',`')`'dnl -Conflicts: libstdc++'CXX_SO`-armhf [biarchhf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libstdc++'CXX_SO`-armhf [biarchhf_archs]') Description: GNU Standard C++ Library v3`'ifdef(`TARGET)',` (TARGET)', `') (hard float ABI) This package contains an additional runtime library for C++ programs built with the GNU compiler. @@ -1941,7 +1962,7 @@ Depends: BASEDEP, ${shlibs:Depends}, libsfgcc1`'LS, ${misc:Depends} ifdef(`TARGET',`Provides: libsfstdc++CXX_SO-TARGET-dcv1 ',`')`'dnl -Conflicts: libstdc++'CXX_SO`-armel [biarchsf_archs] +ifdef(`TARGET',`dnl',`Conflicts: libstdc++'CXX_SO`-armel [biarchsf_archs]') Description: GNU Standard C++ Library v3`'ifdef(`TARGET)',` (TARGET)', `') (soft float ABI) This package contains an additional runtime library for C++ programs built with the GNU compiler. @@ -1980,9 +2001,7 @@ Conflicts: libg++27-dev, libg++272-dev (<< 2.7.2.8-1), libstdc++2.8-dev, libg++2.8-dev, libstdc++2.9-dev, libstdc++2.9-glibc2.1-dev, libstdc++2.10-dev (<< 1:2.95.3-2), libstdc++3.0-dev Suggests: libstdc++CXX_SO`'PV-doc ')`'dnl native -Provides: libstdc++-dev`'LS -ifdef(`TARGET',`, libstdc++-dev-TARGET-dcv1, libstdc++CXX_SO-dev-TARGET-dcv1 -')`'dnl +Provides: libstdc++-dev`'LS`'ifdef(`TARGET',`, libstdc++-dev-TARGET-dcv1, libstdc++CXX_SO-dev-TARGET-dcv1') Description: GNU Standard C++ Library v3 (development files)`'ifdef(`TARGET)',` (TARGET)', `') This package contains the headers and static library files necessary for building C++ programs which use libstdc++. @@ -2019,9 +2038,10 @@ Section: debug Priority: extra Depends: BASEDEP, libstdc++CXX_SO`'LS (>= ${gcc:Version}), libgcc`'GCC_SO-dbg`'LS, ${shlibs:Depends}, ${misc:Depends} -ifdef(`TARGET',`Provides: libstdc++CXX_SO-dbg-TARGET-dcv1 -',ifdef(`MULTIARCH', `Multi-Arch: same -')`Provides: libstdc++'CXX_SO`'PV`-dbg-armel [armel], libstdc++'CXX_SO`'PV`-dbg-armhf [armhf]') +ifdef(`TARGET',`Provides: libstdc++CXX_SO-dbg-TARGET-dcv1',`dnl +ifdef(`MULTIARCH', `Multi-Arch: same',`dnl') +Provides: libstdc++'CXX_SO`'PV`-dbg-armel [armel], libstdc++'CXX_SO`'PV`-dbg-armhf [armhf]dnl +') Recommends: libstdc++CXX_SO`'PV-dev`'LS (= ${gcc:Version}) Conflicts: libstdc++5-dbg`'LS, libstdc++5-3.3-dbg`'LS, libstdc++6-dbg`'LS, libstdc++6-4.0-dbg`'LS, libstdc++6-4.1-dbg`'LS, libstdc++6-4.2-dbg`'LS, libstdc++6-4.3-dbg`'LS, libstdc++6-4.4-dbg`'LS, libstdc++6-4.5-dbg`'LS Description: GNU Standard C++ Library v3 (debugging files)`'ifdef(`TARGET)',` (TARGET)', `') @@ -2084,7 +2104,7 @@ environment. ')`'dnl -ifenabled(`libhfcxx',` +ifenabled(`libhfdbgcxx',` Package: libhfstdc++CXX_SO`'PV-dbg`'LS Architecture: ifdef(`TARGET',`all',`biarchhf_archs') Section: debug @@ -2092,7 +2112,7 @@ Depends: BASEDEP, libhfstdc++CXX_SO`'LS (>= ${gcc:Version}), libstdc++CXX_SO`'PV-dev`'LS (= ${gcc:Version}), libhfgcc`'GCC_SO-dbg`'LS, ${shlibs:Depends}, ${misc:Depends} ifdef(`TARGET',`Provides: libhfstdc++CXX_SO-dbg-TARGET-dcv1 ',`')`'dnl -Conflicts: libhfstdc++6-dbg`'LS, libhfstdc++6-4.3-dbg`'LS, libhfstdc++6-4.4-dbg`'LS, libhfstdc++6-4.5-dbg`'LS, libstdc++'CXX_SO`-armhf [biarchhf_archs] +Conflicts: libhfstdc++6-dbg`'LS, libhfstdc++6-4.3-dbg`'LS, libhfstdc++6-4.4-dbg`'LS, libhfstdc++6-4.5-dbg`'LS`'ifdef(`TARGET',`',`, libstdc++'CXX_SO`-armhf [biarchhf_archs]') Description: GNU Standard C++ Library v3 (debugging files)`'ifdef(`TARGET)',` (TARGET)', `') This package contains the shared library of libstdc++ compiled with debugging symbols. @@ -2101,9 +2121,9 @@ This package contains files for TARGET architecture, for use in cross-compile environment. ')`'dnl -')`'dnl libhfcxx +')`'dnl libhfdbgcxx -ifenabled(`libsfcxx',` +ifenabled(`libsfdbgcxx',` Package: libsfstdc++CXX_SO`'PV-dbg`'LS Architecture: ifdef(`TARGET',`all',`biarchsf_archs') Section: debug @@ -2111,7 +2131,7 @@ Depends: BASEDEP, libsfstdc++CXX_SO`'LS (>= ${gcc:Version}), libstdc++CXX_SO`'PV-dev`'LS (= ${gcc:Version}), libsfgcc`'GCC_SO-dbg`'LS, ${shlibs:Depends}, ${misc:Depends} ifdef(`TARGET',`Provides: libsfstdc++CXX_SO-dbg-TARGET-dcv1 ',`')`'dnl -Conflicts: libsfstdc++6-dbg`'LS, libsfstdc++6-4.3-dbg`'LS, libsfstdc++6-4.4-dbg`'LS, libsfstdc++6-4.5-dbg`'LS, libstdc++'CXX_SO`-armel [biarchsf_archs] +Conflicts: libsfstdc++6-dbg`'LS, libsfstdc++6-4.3-dbg`'LS, libsfstdc++6-4.4-dbg`'LS, libsfstdc++6-4.5-dbg`'LS`'ifdef(`TARGET',`',`, libstdc++'CXX_SO`-armel [biarchsf_archs]') Description: GNU Standard C++ Library v3 (debugging files)`'ifdef(`TARGET)',` (TARGET)', `') This package contains the shared library of libstdc++ compiled with debugging symbols. @@ -2120,7 +2140,7 @@ This package contains files for TARGET architecture, for use in cross-compile environment. ')`'dnl -')`'dnl libsfcxx +')`'dnl libsfdbgcxx ifdef(`TARGET', `', ` Package: libstdc++CXX_SO`'PV-doc @@ -2175,7 +2195,7 @@ Package: libgnat`'-GNAT_V Section: libs Architecture: any -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: PRI(optional) @@ -2192,7 +2212,7 @@ Package: libgnat`'-GNAT_V-dbg Section: debug Architecture: any -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: extra @@ -2210,7 +2230,7 @@ Section: libdevel Architecture: any Priority: extra -Depends: gnat`'PV-base (= ${gnat:Version}), gnat`'PV (= ${gnat:Version}), ada-compiler, +Depends: gnat`'PV-base (= ${gnat:Version}), gnat`'PV (= ${gnat:Version}), libgnatvsn`'GNAT_V (= ${gnat:Version}), ${misc:Depends} Conflicts: libgnatvsn-dev (<< `'GNAT_V), libgnatvsn4.1-dev, libgnatvsn4.3-dev, libgnatvsn4.4-dev, libgnatvsn4.5-dev Description: GNU Ada compiler selected components (development files) @@ -2225,7 +2245,7 @@ Package: libgnatvsn`'GNAT_V Architecture: any -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: PRI(optional) @@ -2243,13 +2263,13 @@ Package: libgnatvsn`'GNAT_V-dbg Architecture: any -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: extra Section: debug Depends: gnat`'PV-base (= ${gnat:Version}), libgnatvsn`'GNAT_V (= ${gnat:Version}), ${misc:Depends} -Suggests: gnat, ada-compiler +Suggests: gnat Description: GNU Ada compiler selected components (debugging symbols) GNAT is a compiler for the Ada programming language. It produces optimized code on platforms supported by the GNU Compiler Collection (GCC). @@ -2264,7 +2284,7 @@ Section: libdevel Architecture: any Priority: extra -Depends: gnat`'PV-base (= ${gnat:Version}), gnat`'PV (= ${gnat:Version}), ada-compiler, +Depends: gnat`'PV-base (= ${gnat:Version}), gnat`'PV (= ${gnat:Version}), libgnatprj`'GNAT_V (= ${gnat:Version}), libgnatvsn`'GNAT_V-dev (= ${gnat:Version}), ${misc:Depends} Conflicts: libgnatprj-dev (<< `'GNAT_V), libgnatprj4.1-dev, libgnatprj4.3-dev, libgnatprj4.4-dev, libgnatprj4.5-dev Description: GNU Ada compiler Project Manager (development files) @@ -2282,7 +2302,7 @@ Package: libgnatprj`'GNAT_V Architecture: any -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: PRI(optional) @@ -2303,13 +2323,13 @@ Package: libgnatprj`'GNAT_V-dbg Architecture: any -ifdef(`TARGET',`',ifdef(`MULTIARCH', `Multi-Arch: same +ifdef(`TARGET',`dnl',ifdef(`MULTIARCH', `Multi-Arch: same Pre-Depends: multiarch-support '))`'dnl Priority: extra Section: debug Depends: gnat`'PV-base (= ${gnat:Version}), libgnatprj`'GNAT_V (= ${gnat:Version}), ${misc:Depends} -Suggests: gnat, ada-compiler +Suggests: gnat Description: GNU Ada compiler Project Manager (debugging symbols) GNAT is a compiler for the Ada programming language. It produces optimized code on platforms supported by the GNU Compiler Collection (GCC). diff -Nru gcc-4.6-4.6.2/debian/libgcc1.symbols.powerpcspe gcc-4.6-4.6.4/debian/libgcc1.symbols.powerpcspe --- gcc-4.6-4.6.2/debian/libgcc1.symbols.powerpcspe 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/libgcc1.symbols.powerpcspe 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,139 @@ +libgcc_s.so.1 libgcc1 #MINVER# + GCC_3.0@GCC_3.0 1:4.1.1 + GCC_3.3.1@GCC_3.3.1 1:4.1.1 + GCC_3.3.4@GCC_3.3.4 1:4.1.1 + GCC_3.3@GCC_3.3 1:4.1.1 + GCC_3.4.2@GCC_3.4.2 1:4.1.1 + GCC_3.4@GCC_3.4 1:4.1.1 + GCC_4.0.0@GCC_4.0.0 1:4.1.1 + GCC_4.1.0@GCC_4.1.0 1:4.1.1 + GCC_4.2.0@GCC_4.2.0 1:4.1.1 + GCC_4.3.0@GCC_4.3.0 1:4.3 + GLIBC_2.0@GLIBC_2.0 1:4.1.1 + _Unwind_Backtrace@GCC_3.3 1:4.1.1 + _Unwind_DeleteException@GCC_3.0 1:4.1.1 + _Unwind_FindEnclosingFunction@GCC_3.3 1:4.1.1 + _Unwind_Find_FDE@GCC_3.0 1:4.1.1 + _Unwind_ForcedUnwind@GCC_3.0 1:4.1.1 + _Unwind_GetCFA@GCC_3.3 1:4.1.1 + _Unwind_GetDataRelBase@GCC_3.0 1:4.1.1 + _Unwind_GetGR@GCC_3.0 1:4.1.1 + _Unwind_GetIP@GCC_3.0 1:4.1.1 + _Unwind_GetIPInfo@GCC_4.2.0 1:4.1.1 + _Unwind_GetLanguageSpecificData@GCC_3.0 1:4.1.1 + _Unwind_GetRegionStart@GCC_3.0 1:4.1.1 + _Unwind_GetTextRelBase@GCC_3.0 1:4.1.1 + _Unwind_RaiseException@GCC_3.0 1:4.1.1 + _Unwind_Resume@GCC_3.0 1:4.1.1 + _Unwind_Resume_or_Rethrow@GCC_3.3 1:4.1.1 + _Unwind_SetGR@GCC_3.0 1:4.1.1 + _Unwind_SetIP@GCC_3.0 1:4.1.1 + __absvdi2@GCC_3.0 1:4.1.1 + __absvsi2@GCC_3.0 1:4.1.1 + __adddf3@GCC_3.0 1:4.1.1 + __addsf3@GCC_3.0 1:4.1.1 + __addvdi3@GCC_3.0 1:4.1.1 + __addvsi3@GCC_3.0 1:4.1.1 + __ashldi3@GCC_3.0 1:4.1.1 + __ashrdi3@GCC_3.0 1:4.1.1 + __bswapdi2@GCC_4.3.0 1:4.3 + __bswapsi2@GCC_4.3.0 1:4.3 + __clear_cache@GCC_3.0 1:4.1.1 + __clzdi2@GCC_3.4 1:4.1.1 + __clzsi2@GCC_3.4 1:4.1.1 + __cmpdi2@GCC_3.0 1:4.1.1 + __ctzdi2@GCC_3.4 1:4.1.1 + __ctzsi2@GCC_3.4 1:4.1.1 + __deregister_frame@GLIBC_2.0 1:4.1.1 + __deregister_frame_info@GLIBC_2.0 1:4.1.1 + __deregister_frame_info_bases@GCC_3.0 1:4.1.1 + __divdc3@GCC_4.0.0 1:4.1.1 + __divdf3@GCC_3.0 1:4.1.1 + __divdi3@GLIBC_2.0 1:4.1.1 + __divsc3@GCC_4.0.0 1:4.1.1 + __divsf3@GCC_3.0 1:4.1.1 + __divtc3@GCC_4.1.0 1:4.1.1 + __emutls_get_address@GCC_4.3.0 1:4.3 + __emutls_register_common@GCC_4.3.0 1:4.3 + __enable_execute_stack@GCC_3.4.2 1:4.1.1 + __eqdf2@GCC_3.0 1:4.1.1 + __eqsf2@GCC_3.0 1:4.1.1 + __extendsfdf2@GCC_3.0 1:4.1.1 + __ffsdi2@GCC_3.0 1:4.1.1 + __ffssi2@GCC_4.3.0 1:4.3 + __fixdfdi@GCC_3.0 1:4.1.1 + __fixdfsi@GCC_3.0 1:4.1.1 + __fixsfdi@GCC_3.0 1:4.1.1 + __fixsfsi@GCC_3.0 1:4.1.1 + __fixtfdi@GCC_4.1.0 1:4.1.1 + __fixunsdfdi@GCC_3.0 1:4.1.1 + __fixunsdfsi@GCC_3.0 1:4.1.1 + __fixunssfdi@GCC_3.0 1:4.1.1 + __fixunssfsi@GCC_3.0 1:4.1.1 + __fixunstfdi@GCC_4.1.0 1:4.1.1 + __floatdidf@GCC_3.0 1:4.1.1 + __floatdisf@GCC_3.0 1:4.1.1 + __floatditf@GCC_4.1.0 1:4.1.1 + __floatsidf@GCC_3.0 1:4.1.1 + __floatsisf@GCC_3.0 1:4.1.1 + __floatundidf@GCC_4.2.0 1:4.2.1 + __floatundisf@GCC_4.2.0 1:4.2.1 + __floatunditf@GCC_4.2.0 1:4.2.1 + __floatunsidf@GCC_4.2.0 1:4.2.1 + __floatunsisf@GCC_4.2.0 1:4.2.1 + __frame_state_for@GLIBC_2.0 1:4.1.1 + __gcc_personality_v0@GCC_3.3.1 1:4.1.1 + __gcc_qadd@GCC_4.1.0 1:4.1.1 + __gcc_qdiv@GCC_4.1.0 1:4.1.1 + __gcc_qmul@GCC_4.1.0 1:4.1.1 + __gcc_qsub@GCC_4.1.0 1:4.1.1 + __gedf2@GCC_3.0 1:4.1.1 + __gesf2@GCC_3.0 1:4.1.1 + __gtdf2@GCC_3.0 1:4.1.1 + __gtsf2@GCC_3.0 1:4.1.1 + __ledf2@GCC_3.0 1:4.1.1 + __lesf2@GCC_3.0 1:4.1.1 + __lshrdi3@GCC_3.0 1:4.1.1 + __ltdf2@GCC_3.0 1:4.1.1 + __ltsf2@GCC_3.0 1:4.1.1 + __moddi3@GLIBC_2.0 1:4.1.1 + __muldc3@GCC_4.0.0 1:4.1.1 + __muldf3@GCC_3.0 1:4.1.1 + __muldi3@GCC_3.0 1:4.1.1 + __mulsc3@GCC_4.0.0 1:4.1.1 + __mulsf3@GCC_3.0 1:4.1.1 + __multc3@GCC_4.1.0 1:4.1.1 + __mulvdi3@GCC_3.0 1:4.1.1 + __mulvsi3@GCC_3.0 1:4.1.1 + __nedf2@GCC_3.0 1:4.1.1 + __negdf2@GCC_3.0 1:4.1.1 + __negdi2@GCC_3.0 1:4.1.1 + __negsf2@GCC_3.0 1:4.1.1 + __negvdi2@GCC_3.0 1:4.1.1 + __negvsi2@GCC_3.0 1:4.1.1 + __nesf2@GCC_3.0 1:4.1.1 + __paritydi2@GCC_3.4 1:4.1.1 + __paritysi2@GCC_3.4 1:4.1.1 + __popcountdi2@GCC_3.4 1:4.1.1 + __popcountsi2@GCC_3.4 1:4.1.1 + __powidf2@GCC_4.0.0 1:4.1.1 + __powisf2@GCC_4.0.0 1:4.1.1 + __powitf2@GCC_4.1.0 1:4.1.1 + __register_frame@GLIBC_2.0 1:4.1.1 + __register_frame_info@GLIBC_2.0 1:4.1.1 + __register_frame_info_bases@GCC_3.0 1:4.1.1 + __register_frame_info_table@GLIBC_2.0 1:4.1.1 + __register_frame_info_table_bases@GCC_3.0 1:4.1.1 + __register_frame_table@GLIBC_2.0 1:4.1.1 + __subdf3@GCC_3.0 1:4.1.1 + __subsf3@GCC_3.0 1:4.1.1 + __subvdi3@GCC_3.0 1:4.1.1 + __subvsi3@GCC_3.0 1:4.1.1 + __trampoline_setup@GCC_3.4.2 1:4.1.1 + __truncdfsf2@GCC_3.0 1:4.1.1 + __ucmpdi2@GCC_3.0 1:4.1.1 + __udivdi3@GLIBC_2.0 1:4.1.1 + __udivmoddi4@GCC_3.0 1:4.1.1 + __umoddi3@GLIBC_2.0 1:4.1.1 + __unorddf2@GCC_3.3.4 1:4.1.1 + __unordsf2@GCC_3.3.4 1:4.1.1 diff -Nru gcc-4.6-4.6.2/debian/libgfortran3.symbols.16.powerpcspe gcc-4.6-4.6.4/debian/libgfortran3.symbols.16.powerpcspe --- gcc-4.6-4.6.2/debian/libgfortran3.symbols.16.powerpcspe 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/libgfortran3.symbols.16.powerpcspe 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,100 @@ + __iso_c_binding_c_f_pointer_c16@GFORTRAN_1.0 4.3 + __iso_c_binding_c_f_pointer_r16@GFORTRAN_1.0 4.3 + _gfortran_arandom_r16@GFORTRAN_1.0 4.3 + _gfortran_bessel_jn_r16@GFORTRAN_1.4 4.6 + _gfortran_bessel_yn_r16@GFORTRAN_1.4 4.6 + _gfortran_cpu_time_16@GFORTRAN_1.0 4.3 + _gfortran_erfc_scaled_r16@GFORTRAN_1.1 4.4.0 + _gfortran_exponent_r16@GFORTRAN_1.0 4.3 + _gfortran_fraction_r16@GFORTRAN_1.0 4.3 + _gfortran_matmul_c16@GFORTRAN_1.0 4.3 + _gfortran_matmul_r16@GFORTRAN_1.0 4.3 + _gfortran_maxloc0_4_r16@GFORTRAN_1.0 4.3 + _gfortran_maxloc0_8_r16@GFORTRAN_1.0 4.3 + _gfortran_maxloc1_4_r16@GFORTRAN_1.0 4.3 + _gfortran_maxloc1_8_r16@GFORTRAN_1.0 4.3 + _gfortran_maxval_r16@GFORTRAN_1.0 4.3 + _gfortran_minloc0_4_r16@GFORTRAN_1.0 4.3 + _gfortran_minloc0_8_r16@GFORTRAN_1.0 4.3 + _gfortran_minloc1_4_r16@GFORTRAN_1.0 4.3 + _gfortran_minloc1_8_r16@GFORTRAN_1.0 4.3 + _gfortran_minval_r16@GFORTRAN_1.0 4.3 + _gfortran_mmaxloc0_4_r16@GFORTRAN_1.0 4.3 + _gfortran_mmaxloc0_8_r16@GFORTRAN_1.0 4.3 + _gfortran_mmaxloc1_4_r16@GFORTRAN_1.0 4.3 + _gfortran_mmaxloc1_8_r16@GFORTRAN_1.0 4.3 + _gfortran_mmaxval_r16@GFORTRAN_1.0 4.3 + _gfortran_mminloc0_4_r16@GFORTRAN_1.0 4.3 + _gfortran_mminloc0_8_r16@GFORTRAN_1.0 4.3 + _gfortran_mminloc1_4_r16@GFORTRAN_1.0 4.3 + _gfortran_mminloc1_8_r16@GFORTRAN_1.0 4.3 + _gfortran_mminval_r16@GFORTRAN_1.0 4.3 + _gfortran_mproduct_c16@GFORTRAN_1.0 4.3 + _gfortran_mproduct_r16@GFORTRAN_1.0 4.3 + _gfortran_msum_c16@GFORTRAN_1.0 4.3 + _gfortran_msum_r16@GFORTRAN_1.0 4.3 + _gfortran_nearest_r16@GFORTRAN_1.0 4.3 + _gfortran_norm2_r16@GFORTRAN_1.4 4.6 + _gfortran_pow_c16_i4@GFORTRAN_1.0 4.3 + _gfortran_pow_c16_i8@GFORTRAN_1.0 4.3 + _gfortran_pow_r16_i4@GFORTRAN_1.0 4.6 + _gfortran_pow_r16_i8@GFORTRAN_1.0 4.3 + _gfortran_product_c16@GFORTRAN_1.0 4.3 + _gfortran_product_r16@GFORTRAN_1.0 4.3 + _gfortran_random_r16@GFORTRAN_1.0 4.3 + _gfortran_reshape_c16@GFORTRAN_1.0 4.3 + _gfortran_reshape_r16@GFORTRAN_1.0 4.3 + _gfortran_rrspacing_r16@GFORTRAN_1.0 4.3 + _gfortran_set_exponent_r16@GFORTRAN_1.0 4.3 + _gfortran_smaxloc0_4_r16@GFORTRAN_1.0 4.3 + _gfortran_smaxloc0_8_r16@GFORTRAN_1.0 4.3 + _gfortran_smaxloc1_4_r16@GFORTRAN_1.0 4.3 + _gfortran_smaxloc1_8_r16@GFORTRAN_1.0 4.3 + _gfortran_smaxval_r16@GFORTRAN_1.0 4.3 + _gfortran_sminloc0_4_r16@GFORTRAN_1.0 4.3 + _gfortran_sminloc0_8_r16@GFORTRAN_1.0 4.3 + _gfortran_sminloc1_4_r16@GFORTRAN_1.0 4.3 + _gfortran_sminloc1_8_r16@GFORTRAN_1.0 4.3 + _gfortran_sminval_r16@GFORTRAN_1.0 4.3 + _gfortran_spacing_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__abs_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__abs_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__acos_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__acosh_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__aimag_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__aint_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__anint_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__asin_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__asinh_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__atan2_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__atan_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__atanh_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__conjg_16@GFORTRAN_1.0 4.3 + _gfortran_specific__cos_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__cos_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__cosh_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__dim_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__exp_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__exp_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__log10_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__log_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__log_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__mod_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__nint_4_16@GFORTRAN_1.0 4.3 + _gfortran_specific__nint_8_16@GFORTRAN_1.0 4.3 + _gfortran_specific__sign_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__sin_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__sin_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__sinh_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__sqrt_c16@GFORTRAN_1.0 4.3 + _gfortran_specific__sqrt_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__tan_r16@GFORTRAN_1.0 4.3 + _gfortran_specific__tanh_r16@GFORTRAN_1.0 4.3 + _gfortran_sproduct_c16@GFORTRAN_1.0 4.3 + _gfortran_sproduct_r16@GFORTRAN_1.0 4.3 + _gfortran_ssum_c16@GFORTRAN_1.0 4.3 + _gfortran_ssum_r16@GFORTRAN_1.0 4.3 + _gfortran_sum_c16@GFORTRAN_1.0 4.3 + _gfortran_sum_r16@GFORTRAN_1.0 4.3 + _gfortran_transpose_c16@GFORTRAN_1.0 4.3 + _gfortran_transpose_r16@GFORTRAN_1.0 4.3 diff -Nru gcc-4.6-4.6.2/debian/libgfortran3.symbols.powerpcspe gcc-4.6-4.6.4/debian/libgfortran3.symbols.powerpcspe --- gcc-4.6-4.6.2/debian/libgfortran3.symbols.powerpcspe 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/libgfortran3.symbols.powerpcspe 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,3 @@ +libgfortran.so.3 libgfortran3 #MINVER# +#include "libgfortran3.symbols.common" +#include "libgfortran3.symbols.16.powerpc" diff -Nru gcc-4.6-4.6.2/debian/libstdc++6.symbols.32bit gcc-4.6-4.6.4/debian/libstdc++6.symbols.32bit --- gcc-4.6-4.6.2/debian/libstdc++6.symbols.32bit 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/libstdc++6.symbols.32bit 2013-04-14 23:00:34.000000000 +0000 @@ -325,7 +325,7 @@ _ZNSt14collate_bynameIcEC2EPKcj@GLIBCXX_3.4 4.1.1 _ZNSt14collate_bynameIwEC1EPKcj@GLIBCXX_3.4 4.1.1 _ZNSt14collate_bynameIwEC2EPKcj@GLIBCXX_3.4 4.1.1 - (arch=!powerpc !ppc64 !sparc)_ZNSt14numeric_limitsIeE12max_digits10E@GLIBCXX_3.4.14 4.5.0 + (arch=!powerpc !powerpcspe !ppc64 !sparc)_ZNSt14numeric_limitsIeE12max_digits10E@GLIBCXX_3.4.14 4.5.0 _ZNSt15basic_streambufIcSt11char_traitsIcEE10pubseekoffExSt12_Ios_SeekdirSt13_Ios_Openmode@GLIBCXX_3.4 4.1.1 _ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_gbumpEi@GLIBCXX_3.4.16 4.6.0 _ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_pbumpEi@GLIBCXX_3.4.16 4.6.0 diff -Nru gcc-4.6-4.6.2/debian/libstdc++6.symbols.64bit gcc-4.6-4.6.4/debian/libstdc++6.symbols.64bit --- gcc-4.6-4.6.2/debian/libstdc++6.symbols.64bit 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/libstdc++6.symbols.64bit 2013-04-14 23:00:34.000000000 +0000 @@ -323,7 +323,7 @@ _ZNSt14collate_bynameIcEC2EPKcm@GLIBCXX_3.4 4.1.1 _ZNSt14collate_bynameIwEC1EPKcm@GLIBCXX_3.4 4.1.1 _ZNSt14collate_bynameIwEC2EPKcm@GLIBCXX_3.4 4.1.1 - (arch=!alpha !powerpc !ppc64 !s390)_ZNSt14numeric_limitsIeE12max_digits10E@GLIBCXX_3.4.14 4.5.0 + (arch=!alpha !powerpc !powerpcspe !ppc64 !s390)_ZNSt14numeric_limitsIeE12max_digits10E@GLIBCXX_3.4.14 4.5.0 _ZNSt15basic_streambufIcSt11char_traitsIcEE10pubseekoffElSt12_Ios_SeekdirSt13_Ios_Openmode@GLIBCXX_3.4 4.1.1 _ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_gbumpEl@GLIBCXX_3.4.16 4.6.0 _ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_pbumpEl@GLIBCXX_3.4.16 4.6.0 diff -Nru gcc-4.6-4.6.2/debian/libstdc++6.symbols.powerpcspe gcc-4.6-4.6.4/debian/libstdc++6.symbols.powerpcspe --- gcc-4.6-4.6.2/debian/libstdc++6.symbols.powerpcspe 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/libstdc++6.symbols.powerpcspe 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,8 @@ +libstdc++.so.6 libstdc++6 #MINVER# +#include "libstdc++6.symbols.32bit" +#include "libstdc++6.symbols.excprop" + __gxx_personality_v0@CXXABI_1.3 4.1.1 +#include "libstdc++6.symbols.glibcxxmath" +#include "libstdc++6.symbols.ldbl.32bit" + _ZNKSt3tr14hashIeEclEe@GLIBCXX_3.4.10 4.3.0~rc2 + _ZNKSt4hashIeEclEe@GLIBCXX_3.4.10 4.3.0~rc2 diff -Nru gcc-4.6-4.6.2/debian/patches/ada-bug564232.diff gcc-4.6-4.6.4/debian/patches/ada-bug564232.diff --- gcc-4.6-4.6.2/debian/patches/ada-bug564232.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/ada-bug564232.diff 2013-04-14 23:00:34.000000000 +0000 @@ -5,7 +5,7 @@ #endif -#if defined (__FreeBSD__) || defined (__vxworks) || defined(__rtems__) -+#if defined (__FreeBSD__) || defined (__FreeBSD_kernel__) || defined (__vxworks) || defined(__rtems__) ++#if defined (__FreeBSD__) || defined (__FreeBSD_kernel__) || defined (__vxworks) || defined(__rtems__) || defined(__GNU__) # define Has_Sockaddr_Len 1 #else # define Has_Sockaddr_Len 0 diff -Nru gcc-4.6-4.6.2/debian/patches/ada-kfreebsd-gnu.diff gcc-4.6-4.6.4/debian/patches/ada-kfreebsd-gnu.diff --- gcc-4.6-4.6.2/debian/patches/ada-kfreebsd-gnu.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/ada-kfreebsd-gnu.diff 2013-04-14 23:00:34.000000000 +0000 @@ -183,6 +183,24 @@ s-osinte.ads S, ++ tv_nsec => long (Long_Long_Integer (F * 10#1#E9))); ++ end To_Timespec; ++ ++end System.OS_Interface; diff -Nru gcc-4.6-4.6.2/debian/patches/ada-s-osinte-gnu.ads.diff gcc-4.6-4.6.4/debian/patches/ada-s-osinte-gnu.ads.diff --- gcc-4.6-4.6.2/debian/patches/ada-s-osinte-gnu.ads.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/ada-s-osinte-gnu.ads.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,753 @@ +--- /dev/null 2012-01-30 20:41:15.189616186 +0100 ++++ b/src/gcc/ada/s-osinte-gnu.ads 2012-04-11 19:34:45.000000000 +0200 +@@ -0,0 +1,750 @@ ++------------------------------------------------------------------------------ ++-- -- ++-- GNU ADA RUN-TIME LIBRARY (GNARL) COMPONENTS -- ++-- -- ++-- S Y S T E M . O S _ I N T E R F A C E -- ++-- -- ++-- S p e c -- ++-- -- ++-- Copyright (C) 1991-1994, Florida State University -- ++-- Copyright (C) 1995-2011, Free Software Foundation, Inc. -- ++-- -- ++-- GNARL is free software; you can redistribute it and/or modify it under -- ++-- terms of the GNU General Public License as published by the Free Soft- -- ++-- ware Foundation; either version 2, or (at your option) any later ver- -- ++-- sion. GNARL is distributed in the hope that it will be useful, but WITH- -- ++-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- ++-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- ++-- for more details. You should have received a copy of the GNU General -- ++-- Public License distributed with GNARL; see file COPYING. If not, write -- ++-- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, -- ++-- Boston, MA 02110-1301, USA. -- ++-- -- ++-- As a special exception, if other files instantiate generics from this -- ++-- unit, or you link this unit with other files to produce an executable, -- ++-- this unit does not by itself cause the resulting executable to be -- ++-- covered by the GNU General Public License. This exception does not -- ++-- however invalidate any other reasons why the executable file might be -- ++-- covered by the GNU Public License. -- ++-- -- ++-- GNARL was developed by the GNARL team at Florida State University. -- ++-- Extensive contributions were provided by Ada Core Technologies, Inc. -- ++-- -- ++------------------------------------------------------------------------------ ++ ++-- This is the GNU/Hurd version of this package ++ ++-- This package encapsulates all direct interfaces to OS services ++-- that are needed by children of System. ++ ++-- PLEASE DO NOT add any with-clauses to this package or remove the pragma ++-- Preelaborate. This package is designed to be a bottom-level (leaf) package ++ ++with Interfaces.C; ++with Unchecked_Conversion; ++ ++package System.OS_Interface is ++ pragma Preelaborate; ++ ++ pragma Linker_Options ("-lpthread"); ++ pragma Linker_Options ("-lrt"); ++ ++ subtype int is Interfaces.C.int; ++ subtype char is Interfaces.C.char; ++ subtype short is Interfaces.C.short; ++ subtype long is Interfaces.C.long; ++ subtype unsigned is Interfaces.C.unsigned; ++ subtype unsigned_short is Interfaces.C.unsigned_short; ++ subtype unsigned_long is Interfaces.C.unsigned_long; ++ subtype unsigned_char is Interfaces.C.unsigned_char; ++ subtype plain_char is Interfaces.C.plain_char; ++ subtype size_t is Interfaces.C.size_t; ++ ++ ----------- ++ -- Errno -- ++ ----------- ++ -- From /usr/include/i386-gnu/bits/errno.h ++ ++ function errno return int; ++ pragma Import (C, errno, "__get_errno"); ++ ++ EAGAIN : constant := 1073741859; ++ EINTR : constant := 1073741828; ++ EINVAL : constant := 1073741846; ++ ENOMEM : constant := 1073741836; ++ EPERM : constant := 1073741825; ++ ETIMEDOUT : constant := 1073741884; ++ ++ ------------- ++ -- Signals -- ++ ------------- ++ -- From /usr/include/i386-gnu/bits/signum.h ++ ++ Max_Interrupt : constant := 32; ++ type Signal is new int range 0 .. Max_Interrupt; ++ for Signal'Size use int'Size; ++ ++ SIGHUP : constant := 1; -- hangup ++ SIGINT : constant := 2; -- interrupt (rubout) ++ SIGQUIT : constant := 3; -- quit (ASCD FS) ++ SIGILL : constant := 4; -- illegal instruction (not reset) ++ SIGTRAP : constant := 5; -- trace trap (not reset) ++ SIGIOT : constant := 6; -- IOT instruction ++ SIGABRT : constant := 6; -- used by abort, replace SIGIOT in the future ++ SIGEMT : constant := 7; -- EMT instruction ++ SIGFPE : constant := 8; -- floating point exception ++ SIGKILL : constant := 9; -- kill (cannot be caught or ignored) ++ SIGBUS : constant := 10; -- bus error ++ SIGSEGV : constant := 11; -- segmentation violation ++ SIGSYS : constant := 12; -- bad argument to system call ++ SIGPIPE : constant := 13; -- write on a pipe with no one to read it ++ SIGALRM : constant := 14; -- alarm clock ++ SIGTERM : constant := 15; -- software termination signal from kill ++ SIGURG : constant := 16; -- urgent condition on IO channel ++ SIGSTOP : constant := 17; -- stop (cannot be caught or ignored) ++ SIGTSTP : constant := 18; -- user stop requested from tty ++ SIGCONT : constant := 19; -- stopped process has been continued ++ SIGCLD : constant := 20; -- alias for SIGCHLD ++ SIGCHLD : constant := 20; -- child status change ++ SIGTTIN : constant := 21; -- background tty read attempted ++ SIGTTOU : constant := 22; -- background tty write attempted ++ SIGIO : constant := 23; -- I/O possible (Solaris SIGPOLL alias) ++ SIGPOLL : constant := 23; -- I/O possible (same as SIGIO?) ++ SIGXCPU : constant := 24; -- CPU time limit exceeded ++ SIGXFSZ : constant := 25; -- filesize limit exceeded ++ SIGVTALRM : constant := 26; -- virtual timer expired ++ SIGPROF : constant := 27; -- profiling timer expired ++ SIGWINCH : constant := 28; -- window size change ++ SIGINFO : constant := 29; -- information request (NetBSD/FreeBSD) ++ SIGUSR1 : constant := 30; -- user defined signal 1 ++ SIGUSR2 : constant := 31; -- user defined signal 2 ++ SIGLOST : constant := 32; -- Resource lost (Sun); server died (GNU) ++-- SIGLTHRRES : constant := 32; -- GNU/LinuxThreads restart signal ++-- SIGLTHRCAN : constant := 33; -- GNU/LinuxThreads cancel signal ++-- SIGLTHRDBG : constant := 34; -- GNU/LinuxThreads debugger signal ++ ++ SIGADAABORT : constant := SIGABRT; ++ -- Change this if you want to use another signal for task abort. ++ -- SIGTERM might be a good one. ++ ++ type Signal_Set is array (Natural range <>) of Signal; ++ ++ Unmasked : constant Signal_Set := ( ++ SIGTRAP, ++ -- To enable debugging on multithreaded applications, mark SIGTRAP to ++ -- be kept unmasked. ++ ++ SIGBUS, ++ ++ SIGTTIN, SIGTTOU, SIGTSTP, ++ -- Keep these three signals unmasked so that background processes ++ -- and IO behaves as normal "C" applications ++ ++ SIGPROF, ++ -- To avoid confusing the profiler ++ ++ SIGKILL, SIGSTOP); ++ -- These two signals actually cannot be masked; ++ -- POSIX simply won't allow it. ++ ++ Reserved : constant Signal_Set := ++ -- I am not sure why the following signal is reserved. ++ -- I guess they are not supported by this version of GNU/Hurd. ++ (0 .. 0 => SIGVTALRM); ++ ++ type sigset_t is private; ++ ++ -- From /usr/include/signal.h /usr/include/i386-gnu/bits/sigset.h ++ function sigaddset (set : access sigset_t; sig : Signal) return int; ++ pragma Import (C, sigaddset, "sigaddset"); ++ ++ function sigdelset (set : access sigset_t; sig : Signal) return int; ++ pragma Import (C, sigdelset, "sigdelset"); ++ ++ function sigfillset (set : access sigset_t) return int; ++ pragma Import (C, sigfillset, "sigfillset"); ++ ++ function sigismember (set : access sigset_t; sig : Signal) return int; ++ pragma Import (C, sigismember, "sigismember"); ++ ++ function sigemptyset (set : access sigset_t) return int; ++ pragma Import (C, sigemptyset, "sigemptyset"); ++ ++ -- sigcontext is architecture dependent, so define it private ++ type struct_sigcontext is private; ++ ++ -- From /usr/include/i386-gnu/bits/sigaction.h: Note: arg. order differs ++ type struct_sigaction is record ++ sa_handler : System.Address; ++ sa_mask : sigset_t; ++ sa_flags : int; ++ end record; ++ pragma Convention (C, struct_sigaction); ++ ++ type struct_sigaction_ptr is access all struct_sigaction; ++ ++ -- From /usr/include/i386-gnu/bits/sigaction.h ++ SIG_BLOCK : constant := 1; ++ SIG_UNBLOCK : constant := 2; ++ SIG_SETMASK : constant := 3; ++ ++ -- From /usr/include/i386-gnu/bits/signum.h ++ SIG_ERR : constant := 1; ++ SIG_DFL : constant := 0; ++ SIG_IGN : constant := 1; ++ SIG_HOLD : constant := 2; ++ ++ -- From /usr/include/i386-gnu/bits/sigaction.h ++ SA_SIGINFO : constant := 16#0040#; ++ SA_ONSTACK : constant := 16#0001#; ++ ++ function sigaction ++ (sig : Signal; ++ act : struct_sigaction_ptr; ++ oact : struct_sigaction_ptr) return int; ++ pragma Import (C, sigaction, "sigaction"); ++ ++ ---------- ++ -- Time -- ++ ---------- ++ ++ Time_Slice_Supported : constant Boolean := True; ++ -- Indicates whether time slicing is supported (i.e SCHED_RR is supported) ++ ++ type timespec is private; ++ ++ function nanosleep (rqtp, rmtp : access timespec) return int; ++ pragma Import (C, nanosleep, "nanosleep"); ++ ++ type clockid_t is private; ++ ++ CLOCK_REALTIME : constant clockid_t; ++ ++ -- From: /usr/include/time.h ++ function clock_gettime ++ (clock_id : clockid_t; ++ tp : access timespec) ++ return int; ++ pragma Import (C, clock_gettime, "clock_gettime"); ++ ++ function To_Duration (TS : timespec) return Duration; ++ pragma Inline (To_Duration); ++ ++ function To_Timespec (D : Duration) return timespec; ++ pragma Inline (To_Timespec); ++ ++ -- From: /usr/include/unistd.h ++ function sysconf (name : int) return long; ++ pragma Import (C, sysconf); ++ ++ -- From /usr/include/i386-gnu/bits/confname.h ++ SC_CLK_TCK : constant := 2; ++ SC_NPROCESSORS_ONLN : constant := 84; ++ ++ ------------------------- ++ -- Priority Scheduling -- ++ ------------------------- ++ -- From /usr/include/i386-gnu/bits/sched.h ++ ++ SCHED_OTHER : constant := 0; ++ SCHED_FIFO : constant := 1; ++ SCHED_RR : constant := 2; ++ ++ function To_Target_Priority ++ (Prio : System.Any_Priority) return Interfaces.C.int; ++ -- Maps System.Any_Priority to a POSIX priority. ++ ++ ------------- ++ -- Process -- ++ ------------- ++ ++ type pid_t is private; ++ ++ -- From: /usr/include/signal.h ++ function kill (pid : pid_t; sig : Signal) return int; ++ pragma Import (C, kill, "kill"); ++ ++ -- From: /usr/include/unistd.h ++ function getpid return pid_t; ++ pragma Import (C, getpid, "getpid"); ++ ++ --------- ++ -- LWP -- ++ --------- ++ ++ -- From: /usr/include/pthread/pthread.h ++ function lwp_self return System.Address; ++ -- lwp_self does not exist on this thread library, revert to pthread_self ++ -- which is the closest approximation (with getpid). This function is ++ -- needed to share 7staprop.adb across POSIX-like targets. ++ pragma Import (C, lwp_self, "pthread_self"); ++ ++ ------------- ++ -- Threads -- ++ ------------- ++ ++ type Thread_Body is access ++ function (arg : System.Address) return System.Address; ++ pragma Convention (C, Thread_Body); ++ ++ function Thread_Body_Access is new ++ Unchecked_Conversion (System.Address, Thread_Body); ++ ++ -- From: /usr/include/bits/pthread.h:typedef int __pthread_t; ++ -- /usr/include/pthread/pthreadtypes.h:typedef __pthread_t pthread_t; ++ type pthread_t is new unsigned_long; ++ subtype Thread_Id is pthread_t; ++ ++ function To_pthread_t is new Unchecked_Conversion ++ (unsigned_long, pthread_t); ++ ++ type pthread_mutex_t is limited private; ++ type pthread_cond_t is limited private; ++ type pthread_attr_t is limited private; ++ type pthread_mutexattr_t is limited private; ++ type pthread_condattr_t is limited private; ++ type pthread_key_t is private; ++ ++ -- From /usr/include/pthread/pthreadtypes.h ++ PTHREAD_CREATE_DETACHED : constant := 1; ++ PTHREAD_CREATE_JOINABLE : constant := 0; ++ ++ PTHREAD_SCOPE_PROCESS : constant := 1; ++ PTHREAD_SCOPE_SYSTEM : constant := 0; ++ ++ ----------- ++ -- Stack -- ++ ----------- ++ ++ -- From: /usr/include/i386-gnu/bits/sigstack.h ++ type stack_t is record ++ ss_sp : System.Address; ++ ss_size : size_t; ++ ss_flags : int; ++ end record; ++ pragma Convention (C, stack_t); ++ ++ function sigaltstack ++ (ss : not null access stack_t; ++ oss : access stack_t) return int; ++ pragma Import (C, sigaltstack, "sigaltstack"); ++ ++ Alternate_Stack : aliased System.Address; ++ -- This is a dummy definition, never used (Alternate_Stack_Size is null) ++ ++ Alternate_Stack_Size : constant := 0; ++ -- No alternate signal stack is used on this platform ++ ++ Stack_Base_Available : constant Boolean := False; ++ -- Indicates whether the stack base is available on this target ++ ++ function Get_Stack_Base (thread : pthread_t) return Address; ++ pragma Inline (Get_Stack_Base); ++ -- returns the stack base of the specified thread. Only call this function ++ -- when Stack_Base_Available is True. ++ ++ -- From: /usr/include/i386-gnu/bits/shm.h __getpagesize or getpagesize?? ++ function Get_Page_Size return size_t; ++ function Get_Page_Size return Address; ++ pragma Import (C, Get_Page_Size, "__getpagesize"); ++ -- Returns the size of a page ++ ++ -- From /usr/include/i386-gnu/bits/mman.h ++ PROT_NONE : constant := 0; ++ PROT_READ : constant := 4; ++ PROT_WRITE : constant := 2; ++ PROT_EXEC : constant := 1; ++ PROT_ALL : constant := PROT_READ + PROT_WRITE + PROT_EXEC; ++ PROT_ON : constant := PROT_NONE; ++ PROT_OFF : constant := PROT_ALL; ++ ++ -- From /usr/include/i386-gnu/bits/mman.h ++ function mprotect (addr : Address; len : size_t; prot : int) return int; ++ pragma Import (C, mprotect); ++ ++ --------------------------------------- ++ -- Nonstandard Thread Initialization -- ++ --------------------------------------- ++ ++ procedure pthread_init; ++ pragma Inline (pthread_init); ++ -- This is a dummy procedure to share some GNULLI files ++ ++ ------------------------- ++ -- POSIX.1c Section 3 -- ++ ------------------------- ++ ++ -- From: /usr/include/signal.h: ++ -- sigwait (__const sigset_t *__restrict __set, int *__restrict __sig) ++ function sigwait (set : access sigset_t; sig : access Signal) return int; ++ pragma Import (C, sigwait, "sigwait"); ++ ++ -- From: /usr/include/pthread/pthread.h: ++ -- extern int pthread_kill (pthread_t thread, int signo); ++ function pthread_kill (thread : pthread_t; sig : Signal) return int; ++ pragma Import (C, pthread_kill, "pthread_kill"); ++ ++ -- From: /usr/include/i386-gnu/bits/sigthread.h ++ -- extern int pthread_sigmask (int __how, __const __sigset_t *__newmask, ++ -- __sigset_t *__oldmask) __THROW; ++ function pthread_sigmask ++ (how : int; ++ set : access sigset_t; ++ oset : access sigset_t) return int; ++ pragma Import (C, pthread_sigmask, "pthread_sigmask"); ++ ++ -------------------------- ++ -- POSIX.1c Section 11 -- ++ -------------------------- ++ ++ -- From: /usr/include/pthread/pthread.h and ++ -- /usr/include/pthread/pthreadtypes.h ++ function pthread_mutexattr_init ++ (attr : access pthread_mutexattr_t) return int; ++ pragma Import (C, pthread_mutexattr_init, "pthread_mutexattr_init"); ++ ++ function pthread_mutexattr_destroy ++ (attr : access pthread_mutexattr_t) return int; ++ pragma Import (C, pthread_mutexattr_destroy, "pthread_mutexattr_destroy"); ++ ++ function pthread_mutex_init ++ (mutex : access pthread_mutex_t; ++ attr : access pthread_mutexattr_t) return int; ++ pragma Import (C, pthread_mutex_init, "pthread_mutex_init"); ++ ++ function pthread_mutex_destroy (mutex : access pthread_mutex_t) return int; ++ pragma Import (C, pthread_mutex_destroy, "pthread_mutex_destroy"); ++ ++ function pthread_mutex_lock (mutex : access pthread_mutex_t) return int; ++ pragma Import (C, pthread_mutex_lock, "pthread_mutex_lock"); ++ ++ function pthread_mutex_unlock (mutex : access pthread_mutex_t) return int; ++ pragma Import (C, pthread_mutex_unlock, "pthread_mutex_unlock"); ++ ++ function pthread_condattr_init ++ (attr : access pthread_condattr_t) return int; ++ pragma Import (C, pthread_condattr_init, "pthread_condattr_init"); ++ ++ function pthread_condattr_destroy ++ (attr : access pthread_condattr_t) return int; ++ pragma Import (C, pthread_condattr_destroy, "pthread_condattr_destroy"); ++ ++ function pthread_cond_init ++ (cond : access pthread_cond_t; ++ attr : access pthread_condattr_t) return int; ++ pragma Import (C, pthread_cond_init, "pthread_cond_init"); ++ ++ function pthread_cond_destroy (cond : access pthread_cond_t) return int; ++ pragma Import (C, pthread_cond_destroy, "pthread_cond_destroy"); ++ ++ function pthread_cond_signal (cond : access pthread_cond_t) return int; ++ pragma Import (C, pthread_cond_signal, "pthread_cond_signal"); ++ ++ function pthread_cond_wait ++ (cond : access pthread_cond_t; ++ mutex : access pthread_mutex_t) return int; ++ pragma Import (C, pthread_cond_wait, "pthread_cond_wait"); ++ ++ function pthread_cond_timedwait ++ (cond : access pthread_cond_t; ++ mutex : access pthread_mutex_t; ++ abstime : access timespec) return int; ++ pragma Import (C, pthread_cond_timedwait, "pthread_cond_timedwait"); ++ ++ Relative_Timed_Wait : constant Boolean := False; ++ -- pthread_cond_timedwait requires an absolute delay time ++ ++ -------------------------- ++ -- POSIX.1c Section 13 -- ++ -------------------------- ++ -- From /usr/include/pthread/pthreadtypes.h ++ ++ PTHREAD_PRIO_NONE : constant := 0; ++ PTHREAD_PRIO_PROTECT : constant := 2; ++ PTHREAD_PRIO_INHERIT : constant := 1; ++ ++ -- From: /usr/include/pthread/pthread.h ++ function pthread_mutexattr_setprotocol ++ (attr : access pthread_mutexattr_t; ++ protocol : int) return int; ++ pragma Import (C, pthread_mutexattr_setprotocol, ++ "pthread_mutexattr_setprotocol"); ++ ++ function pthread_mutexattr_getprotocol ++ (attr : access pthread_mutexattr_t; ++ protocol : access int) return int; ++ pragma Import (C, pthread_mutexattr_getprotocol, ++ "pthread_mutexattr_getprotocol"); ++ ++ function pthread_mutexattr_setprioceiling ++ (attr : access pthread_mutexattr_t; ++ prioceiling : int) return int; ++ pragma Import (C, pthread_mutexattr_setprioceiling, ++ "pthread_mutexattr_setprioceiling"); ++ ++ function pthread_mutexattr_getprioceiling ++ (attr : access pthread_mutexattr_t; ++ prioceiling : access int) return int; ++ pragma Import (C, pthread_mutexattr_getprioceiling, ++ "pthread_mutexattr_getprioceiling"); ++ ++ type struct_sched_param is record ++ sched_priority : int; -- scheduling priority ++ end record; ++ pragma Convention (C, struct_sched_param); ++ ++ function pthread_setschedparam ++ (thread : pthread_t; ++ policy : int; ++ param : access struct_sched_param) return int; ++ pragma Import (C, pthread_setschedparam, "pthread_setschedparam"); ++ ++ function pthread_attr_setscope ++ (attr : access pthread_attr_t; ++ contentionscope : int) return int; ++ pragma Import (C, pthread_attr_setscope, "pthread_attr_setscope"); ++ ++ function pthread_attr_getscope ++ (attr : access pthread_attr_t; ++ contentionscope : access int) return int; ++ pragma Import (C, pthread_attr_getscope, "pthread_attr_getscope"); ++ ++ function pthread_attr_setinheritsched ++ (attr : access pthread_attr_t; ++ inheritsched : int) return int; ++ pragma Import (C, pthread_attr_setinheritsched, ++ "pthread_attr_setinheritsched"); ++ ++ function pthread_attr_getinheritsched ++ (attr : access pthread_attr_t; ++ inheritsched : access int) return int; ++ pragma Import (C, pthread_attr_getinheritsched, ++ "pthread_attr_getinheritsched"); ++ ++ function pthread_attr_setschedpolicy ++ (attr : access pthread_attr_t; ++ policy : int) return int; ++ pragma Import (C, pthread_attr_setschedpolicy, "pthread_setschedpolicy"); ++ ++ function sched_yield return int; ++ pragma Import (C, sched_yield, "sched_yield"); ++ ++ --------------------------- ++ -- P1003.1c - Section 16 -- ++ --------------------------- ++ ++ function pthread_attr_init ++ (attributes : access pthread_attr_t) return int; ++ pragma Import (C, pthread_attr_init, "pthread_attr_init"); ++ ++ function pthread_attr_destroy ++ (attributes : access pthread_attr_t) return int; ++ pragma Import (C, pthread_attr_destroy, "pthread_attr_destroy"); ++ ++ function pthread_attr_setdetachstate ++ (attr : access pthread_attr_t; ++ detachstate : int) return int; ++ pragma Import ++ (C, pthread_attr_setdetachstate, "pthread_attr_setdetachstate"); ++ ++ function pthread_attr_setstacksize ++ (attr : access pthread_attr_t; ++ stacksize : size_t) return int; ++ pragma Import (C, pthread_attr_setstacksize, "pthread_attr_setstacksize"); ++ ++ -- From: /usr/include/pthread/pthread.h ++ function pthread_create ++ (thread : access pthread_t; ++ attributes : access pthread_attr_t; ++ start_routine : Thread_Body; ++ arg : System.Address) return int; ++ pragma Import (C, pthread_create, "pthread_create"); ++ ++ procedure pthread_exit (status : System.Address); ++ pragma Import (C, pthread_exit, "pthread_exit"); ++ ++ function pthread_self return pthread_t; ++ pragma Import (C, pthread_self, "pthread_self"); ++ ++ -------------------------- ++ -- POSIX.1c Section 17 -- ++ -------------------------- ++ ++ function pthread_setspecific ++ (key : pthread_key_t; ++ value : System.Address) return int; ++ pragma Import (C, pthread_setspecific, "pthread_setspecific"); ++ ++ function pthread_getspecific (key : pthread_key_t) return System.Address; ++ pragma Import (C, pthread_getspecific, "pthread_getspecific"); ++ ++ type destructor_pointer is access procedure (arg : System.Address); ++ pragma Convention (C, destructor_pointer); ++ ++ function pthread_key_create ++ (key : access pthread_key_t; ++ destructor : destructor_pointer) return int; ++ pragma Import (C, pthread_key_create, "pthread_key_create"); ++ ++ -- From /usr/include/i386-gnu/bits/sched.h ++ -- 1_024 == 1024?? ++ CPU_SETSIZE : constant := 1_024; ++ ++ type bit_field is array (1 .. CPU_SETSIZE) of Boolean; ++ for bit_field'Size use CPU_SETSIZE; ++ pragma Pack (bit_field); ++ pragma Convention (C, bit_field); ++ ++ type cpu_set_t is record ++ bits : bit_field; ++ end record; ++ pragma Convention (C, cpu_set_t); ++ ++ -- function pthread_setaffinity_np ++ -- (thread : pthread_t; ++ -- cpusetsize : size_t; ++ -- cpuset : access cpu_set_t) return int; ++ -- pragma Import (C, pthread_setaffinity_np, ++ -- "__gnat_pthread_setaffinity_np"); ++ ++private ++ ++ type sigset_t is array (1 .. 4) of unsigned; ++ ++ -- FIXME: ++ -- In GNU/Hurd the component sa_handler turns out to ++ -- be one a union type, and the selector is a macro: ++ -- #define sa_handler __sigaction_handler.sa_handler ++ -- #define sa_sigaction __sigaction_handler.sa_sigaction ++ ++ -- In FreeBSD the component sa_handler turns out to ++ -- be one a union type, and the selector is a macro: ++ -- #define sa_handler __sigaction_u._handler ++ -- #define sa_sigaction __sigaction_u._sigaction ++ ++ -- Should we add a signal_context type here ? ++ -- How could it be done independent of the CPU architecture ? ++ -- sigcontext type is opaque, so it is architecturally neutral. ++ -- It is always passed as an access type, so define it as an empty record ++ -- since the contents are not used anywhere. ++ type struct_sigcontext is null record; ++ pragma Convention (C, struct_sigcontext); ++ ++ type pid_t is new int; ++ ++ type time_t is new long; ++ ++ type timespec is record ++ tv_sec : time_t; ++ tv_nsec : long; ++ end record; ++ pragma Convention (C, timespec); ++ ++ type clockid_t is new int; ++ CLOCK_REALTIME : constant clockid_t := 0; ++ ++ -- From: /usr/include/pthread/pthreadtypes.h: ++ -- typedef struct __pthread_attr pthread_attr_t; ++ -- /usr/include/bits/thread-attr.h: struct __pthread_attr... ++ -- /usr/include/pthread/pthreadtypes.h: enum __pthread_contentionscope ++ -- enum __pthread_detachstate detachstate; ++ -- enum __pthread_inheritsched inheritsched; ++ -- enum __pthread_contentionscope contentionscope; ++ -- Not used: schedpolicy : int; ++ type pthread_attr_t is record ++ schedparam : struct_sched_param; ++ stackaddr : System.Address; ++ stacksize : size_t; ++ guardsize : size_t; ++ detachstate : int; ++ inheritsched : int; ++ contentionscope : int; ++ schedpolicy : int; ++ end record; ++ pragma Convention (C, pthread_attr_t); ++ ++ -- From: /usr/include/pthread/pthreadtypes.h: ++ -- typedef struct __pthread_condattr pthread_condattr_t; ++ -- From: /usr/include/bits/condition-attr.h: ++ -- struct __pthread_condattr { ++ -- enum __pthread_process_shared pshared; ++ -- __Clockid_T Clock;} ++ -- From: /usr/include/pthread/pthreadtypes.h: ++ -- enum __pthread_process_shared ++ type pthread_condattr_t is record ++ pshared : int; ++ clock : clockid_t; ++ end record; ++ pragma Convention (C, pthread_condattr_t); ++ ++ -- From: /usr/include/pthread/pthreadtypes.h: ++ -- typedef struct __pthread_mutexattr pthread_mutexattr_t; and ++ -- /usr/include/bits/mutex-attr.h ++ -- struct __pthread_mutexattr { ++ -- Int Prioceiling; ++ -- Enum __Pthread_Mutex_Protocol Protocol; ++ -- Enum __Pthread_Process_Shared Pshared; ++ -- Enum __Pthread_Mutex_Type Mutex_Type;}; ++ type pthread_mutexattr_t is record ++ prioceiling : int; ++ protocol : int; ++ pshared : int; ++ mutex_type : int; ++ end record; ++ pragma Convention (C, pthread_mutexattr_t); ++ ++ -- From: /usr/include/pthread/pthreadtypes.h ++ -- typedef struct __pthread_mutex pthread_mutex_t; and ++ -- /usr/include/bits/mutex.h: ++ -- struct __pthread_mutex { ++ -- __pthread_spinlock_t __held; ++ -- __pthread_spinlock_t __lock; ++ -- /* in cthreads, mutex_init does not initialized the third ++ -- pointer, as such, we cannot rely on its value for anything. */ ++ -- char *cthreadscompat1; ++ -- struct __pthread *__queue; ++ -- struct __pthread_mutexattr *attr; ++ -- void *data; ++ -- /* up to this point, we are completely compatible with cthreads ++ -- and what libc expects. */ ++ -- void *owner; ++ -- unsigned locks; ++ -- /* if null then the default attributes apply. */ ++ -- }; ++ type pthread_mutex_t is record ++ held : int; ++ lock : int; ++ cthreadcompat : System.Address; ++ queue : System.Address; ++ attr : System.Address; ++ data : System.Address; ++ owner : System.Address; ++ locks : unsigned; ++ end record; ++ pragma Convention (C, pthread_mutex_t); ++ -- pointer needed? ++ -- type pthread_mutex_t_ptr is access pthread_mutex_t; ++ ++ -- From: /usr/include/pthread/pthreadtypes.h: ++ -- typedef struct __pthread_cond pthread_cond_t; ++ -- typedef struct __pthread_condattr pthread_condattr_t; ++ -- /usr/include/bits/condition.h:struct __pthread_cond{} ++ -- pthread_condattr_t: see above! ++ -- /usr/include/bits/condition.h: struct __pthread_condimpl *__impl; ++ ++ type pthread_cond_t is record ++ lock : int; ++ queue : System.Address; ++ condattr : System.Address; ++ impl : System.Address; ++ data : System.Address; ++ end record; ++ pragma Convention (C, pthread_cond_t); ++ ++ -- From: /usr/include/pthread/pthreadtypes.h: ++ -- typedef __pthread_key pthread_key_t; and ++ -- /usr/include/bits/thread-specific.h: ++ -- typedef int __pthread_key; ++ type pthread_key_t is new int; ++ ++end System.OS_Interface; diff -Nru gcc-4.6-4.6.2/debian/patches/ada-s-taprop-gnu.adb.diff gcc-4.6-4.6.4/debian/patches/ada-s-taprop-gnu.adb.diff --- gcc-4.6-4.6.2/debian/patches/ada-s-taprop-gnu.adb.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/ada-s-taprop-gnu.adb.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,1339 @@ +--- /dev/null 2012-01-30 20:41:15.189616186 +0100 ++++ b/src/gcc/ada/s-taprop-gnu.adb 2012-04-11 19:17:52.000000000 +0200 +@@ -0,0 +1,1336 @@ ++------------------------------------------------------------------------------ ++-- -- ++-- GNAT RUN-TIME LIBRARY (GNARL) COMPONENTS -- ++-- -- ++-- S Y S T E M . T A S K _ P R I M I T I V E S . O P E R A T I O N S -- ++-- -- ++-- B o d y -- ++-- -- ++-- Copyright (C) 1992-2009, Free Software Foundation, Inc. -- ++-- -- ++-- GNARL is free software; you can redistribute it and/or modify it under -- ++-- terms of the GNU General Public License as published by the Free Soft- -- ++-- ware Foundation; either version 3, or (at your option) any later ver- -- ++-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- ++-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- ++-- or FITNESS FOR A PARTICULAR PURPOSE. -- ++-- -- ++-- As a special exception under Section 7 of GPL version 3, you are granted -- ++-- additional permissions described in the GCC Runtime Library Exception, -- ++-- version 3.1, as published by the Free Software Foundation. -- ++-- -- ++-- You should have received a copy of the GNU General Public License and -- ++-- a copy of the GCC Runtime Library Exception along with this program; -- ++-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- ++-- . -- ++-- -- ++-- GNARL was developed by the GNARL team at Florida State University. -- ++-- Extensive contributions were provided by Ada Core Technologies, Inc. -- ++-- -- ++------------------------------------------------------------------------------ ++ ++-- This is a GNU/Hurd version of this package ++-- Note: Removed the SCHED_FIFO and Ceiling Locking from the posix version ++-- since these functions are not (yet) supported on GNU/Hurd ++ ++-- This package contains all the GNULL primitives that interface directly with ++-- the underlying OS. ++ ++pragma Polling (Off); ++-- Turn off polling, we do not want ATC polling to take place during tasking ++-- operations. It causes infinite loops and other problems. ++ ++with Ada.Unchecked_Conversion; ++with Ada.Unchecked_Deallocation; ++ ++with Interfaces.C; ++ ++with System.Tasking.Debug; ++with System.Interrupt_Management; ++with System.OS_Primitives; ++with System.Task_Info; ++ ++with System.Soft_Links; ++-- We use System.Soft_Links instead of System.Tasking.Initialization ++-- because the later is a higher level package that we shouldn't depend on. ++-- For example when using the restricted run time, it is replaced by ++-- System.Tasking.Restricted.Stages. ++ ++package body System.Task_Primitives.Operations is ++ ++ package SSL renames System.Soft_Links; ++ ++ use System.Tasking.Debug; ++ use System.Tasking; ++ use Interfaces.C; ++ use System.OS_Interface; ++ use System.Parameters; ++ use System.OS_Primitives; ++ ++ ---------------- ++ -- Local Data -- ++ ---------------- ++ ++ -- The followings are logically constants, but need to be initialized ++ -- at run time. ++ ++ Single_RTS_Lock : aliased RTS_Lock; ++ -- This is a lock to allow only one thread of control in the RTS at ++ -- a time; it is used to execute in mutual exclusion from all other tasks. ++ -- Used mainly in Single_Lock mode, but also to protect All_Tasks_List ++ ++ ATCB_Key : aliased pthread_key_t; ++ -- Key used to find the Ada Task_Id associated with a thread ++ ++ Environment_Task_Id : Task_Id; ++ -- A variable to hold Task_Id for the environment task ++ ++ Unblocked_Signal_Mask : aliased sigset_t; ++ -- The set of signals that should unblocked in all tasks ++ ++ -- The followings are internal configuration constants needed ++ ++ Next_Serial_Number : Task_Serial_Number := 100; ++ -- We start at 100, to reserve some special values for ++ -- using in error checking. ++ ++ Foreign_Task_Elaborated : aliased Boolean := True; ++ -- Used to identified fake tasks (i.e., non-Ada Threads) ++ ++ Use_Alternate_Stack : constant Boolean := Alternate_Stack_Size /= 0; ++ -- Whether to use an alternate signal stack for stack overflows ++ ++ Abort_Handler_Installed : Boolean := False; ++ -- True if a handler for the abort signal is installed ++ ++ -------------------- ++ -- Local Packages -- ++ -------------------- ++ ++ package Specific is ++ ++ procedure Initialize (Environment_Task : Task_Id); ++ pragma Inline (Initialize); ++ -- Initialize various data needed by this package ++ ++ function Is_Valid_Task return Boolean; ++ pragma Inline (Is_Valid_Task); ++ -- Does executing thread have a TCB? ++ ++ procedure Set (Self_Id : Task_Id); ++ pragma Inline (Set); ++ -- Set the self id for the current task ++ ++ function Self return Task_Id; ++ pragma Inline (Self); ++ -- Return a pointer to the Ada Task Control Block of the calling task ++ ++ end Specific; ++ ++ package body Specific is separate; ++ -- The body of this package is target specific ++ ++ --------------------------------- ++ -- Support for foreign threads -- ++ --------------------------------- ++ ++ function Register_Foreign_Thread (Thread : Thread_Id) return Task_Id; ++ -- Allocate and Initialize a new ATCB for the current Thread ++ ++ function Register_Foreign_Thread ++ (Thread : Thread_Id) return Task_Id is separate; ++ ++ ----------------------- ++ -- Local Subprograms -- ++ ----------------------- ++ ++ procedure Abort_Handler (Sig : Signal); ++ -- Signal handler used to implement asynchronous abort. ++ -- See also comment before body, below. ++ ++ function To_Address is ++ new Ada.Unchecked_Conversion (Task_Id, System.Address); ++ ++ ------------------- ++ -- Abort_Handler -- ++ ------------------- ++ ++ -- Target-dependent binding of inter-thread Abort signal to the raising of ++ -- the Abort_Signal exception. ++ ++ -- The technical issues and alternatives here are essentially the ++ -- same as for raising exceptions in response to other signals ++ -- (e.g. Storage_Error). See code and comments in the package body ++ -- System.Interrupt_Management. ++ ++ -- Some implementations may not allow an exception to be propagated out of ++ -- a handler, and others might leave the signal or interrupt that invoked ++ -- this handler masked after the exceptional return to the application ++ -- code. ++ ++ -- GNAT exceptions are originally implemented using setjmp()/longjmp(). On ++ -- most UNIX systems, this will allow transfer out of a signal handler, ++ -- which is usually the only mechanism available for implementing ++ -- asynchronous handlers of this kind. However, some systems do not ++ -- restore the signal mask on longjmp(), leaving the abort signal masked. ++ ++ procedure Abort_Handler (Sig : Signal) is ++ pragma Unreferenced (Sig); ++ ++ T : constant Task_Id := Self; ++ Old_Set : aliased sigset_t; ++ ++ Result : Interfaces.C.int; ++ pragma Warnings (Off, Result); ++ ++ begin ++ -- It's not safe to raise an exception when using GCC ZCX mechanism. ++ -- Note that we still need to install a signal handler, since in some ++ -- cases (e.g. shutdown of the Server_Task in System.Interrupts) we ++ -- need to send the Abort signal to a task. ++ ++ if ZCX_By_Default and then GCC_ZCX_Support then ++ return; ++ end if; ++ ++ if T.Deferral_Level = 0 ++ and then T.Pending_ATC_Level < T.ATC_Nesting_Level and then ++ not T.Aborting ++ then ++ T.Aborting := True; ++ ++ -- Make sure signals used for RTS internal purpose are unmasked ++ ++ Result := pthread_sigmask (SIG_UNBLOCK, ++ Unblocked_Signal_Mask'Access, Old_Set'Access); ++ pragma Assert (Result = 0); ++ ++ raise Standard'Abort_Signal; ++ end if; ++ end Abort_Handler; ++ ++ ----------------- ++ -- Stack_Guard -- ++ ----------------- ++ ++ procedure Stack_Guard (T : ST.Task_Id; On : Boolean) is ++ Stack_Base : constant Address := Get_Stack_Base (T.Common.LL.Thread); ++ Guard_Page_Address : Address; ++ ++ Res : Interfaces.C.int; ++ ++ begin ++ if Stack_Base_Available then ++ ++ -- Compute the guard page address ++ ++ Guard_Page_Address := ++ Stack_Base - (Stack_Base mod Get_Page_Size) + Get_Page_Size; ++ ++ Res := ++ mprotect (Guard_Page_Address, Get_Page_Size, ++ prot => (if On then PROT_ON else PROT_OFF)); ++ pragma Assert (Res = 0); ++ end if; ++ end Stack_Guard; ++ ++ -------------------- ++ -- Get_Thread_Id -- ++ -------------------- ++ ++ function Get_Thread_Id (T : ST.Task_Id) return OSI.Thread_Id is ++ begin ++ return T.Common.LL.Thread; ++ end Get_Thread_Id; ++ ++ ---------- ++ -- Self -- ++ ---------- ++ ++ function Self return Task_Id renames Specific.Self; ++ ++ --------------------- ++ -- Initialize_Lock -- ++ --------------------- ++ ++ -- Note: mutexes and cond_variables needed per-task basis are ++ -- initialized in Initialize_TCB and the Storage_Error is ++ -- handled. Other mutexes (such as RTS_Lock, Memory_Lock...) ++ -- used in RTS is initialized before any status change of RTS. ++ -- Therefore raising Storage_Error in the following routines ++ -- should be able to be handled safely. ++ ++ procedure Initialize_Lock ++ (Prio : System.Any_Priority; ++ L : not null access Lock) ++ is ++ pragma Unreferenced (Prio); ++ ++ Attributes : aliased pthread_mutexattr_t; ++ Result : Interfaces.C.int; ++ ++ begin ++ Result := pthread_mutexattr_init (Attributes'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = ENOMEM then ++ raise Storage_Error with "Failed to allocate a lock"; ++ end if; ++ ++ Result := pthread_mutex_init (L, Attributes'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = ENOMEM then ++ Result := pthread_mutexattr_destroy (Attributes'Access); ++ raise Storage_Error; ++ end if; ++ ++ Result := pthread_mutexattr_destroy (Attributes'Access); ++ pragma Assert (Result = 0); ++ end Initialize_Lock; ++ ++ procedure Initialize_Lock ++ (L : not null access RTS_Lock; Level : Lock_Level) ++ is ++ pragma Unreferenced (Level); ++ ++ Attributes : aliased pthread_mutexattr_t; ++ Result : Interfaces.C.int; ++ ++ begin ++ Result := pthread_mutexattr_init (Attributes'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = ENOMEM then ++ raise Storage_Error with "Failed to allocate a lock"; ++ end if; ++ ++ Result := pthread_mutex_init (L, Attributes'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = ENOMEM then ++ Result := pthread_mutexattr_destroy (Attributes'Access); ++ raise Storage_Error; ++ end if; ++ ++ Result := pthread_mutexattr_destroy (Attributes'Access); ++ pragma Assert (Result = 0); ++ end Initialize_Lock; ++ ++ ------------------- ++ -- Finalize_Lock -- ++ ------------------- ++ ++ procedure Finalize_Lock (L : not null access Lock) is ++ Result : Interfaces.C.int; ++ begin ++ Result := pthread_mutex_destroy (L); ++ pragma Assert (Result = 0); ++ end Finalize_Lock; ++ ++ procedure Finalize_Lock (L : not null access RTS_Lock) is ++ Result : Interfaces.C.int; ++ begin ++ Result := pthread_mutex_destroy (L); ++ pragma Assert (Result = 0); ++ end Finalize_Lock; ++ ++ ---------------- ++ -- Write_Lock -- ++ ---------------- ++ ++ procedure Write_Lock ++ (L : not null access Lock; Ceiling_Violation : out Boolean) ++ is ++ Result : Interfaces.C.int; ++ ++ begin ++ Result := pthread_mutex_lock (L); ++ ++ -- Assume that the cause of EINVAL is a priority ceiling violation ++ ++ Ceiling_Violation := (Result = EINVAL); ++ pragma Assert (Result = 0 or else Result = EINVAL); ++ end Write_Lock; ++ ++ procedure Write_Lock ++ (L : not null access RTS_Lock; ++ Global_Lock : Boolean := False) ++ is ++ Result : Interfaces.C.int; ++ begin ++ if not Single_Lock or else Global_Lock then ++ Result := pthread_mutex_lock (L); ++ pragma Assert (Result = 0); ++ end if; ++ end Write_Lock; ++ ++ procedure Write_Lock (T : Task_Id) is ++ Result : Interfaces.C.int; ++ begin ++ if not Single_Lock then ++ Result := pthread_mutex_lock (T.Common.LL.L'Access); ++ pragma Assert (Result = 0); ++ end if; ++ end Write_Lock; ++ ++ --------------- ++ -- Read_Lock -- ++ --------------- ++ ++ procedure Read_Lock ++ (L : not null access Lock; Ceiling_Violation : out Boolean) is ++ begin ++ Write_Lock (L, Ceiling_Violation); ++ end Read_Lock; ++ ++ ------------ ++ -- Unlock -- ++ ------------ ++ ++ procedure Unlock (L : not null access Lock) is ++ Result : Interfaces.C.int; ++ begin ++ Result := pthread_mutex_unlock (L); ++ pragma Assert (Result = 0); ++ end Unlock; ++ ++ procedure Unlock ++ (L : not null access RTS_Lock; Global_Lock : Boolean := False) ++ is ++ Result : Interfaces.C.int; ++ begin ++ if not Single_Lock or else Global_Lock then ++ Result := pthread_mutex_unlock (L); ++ pragma Assert (Result = 0); ++ end if; ++ end Unlock; ++ ++ procedure Unlock (T : Task_Id) is ++ Result : Interfaces.C.int; ++ begin ++ if not Single_Lock then ++ Result := pthread_mutex_unlock (T.Common.LL.L'Access); ++ pragma Assert (Result = 0); ++ end if; ++ end Unlock; ++ ++ ----------------- ++ -- Set_Ceiling -- ++ ----------------- ++ ++ -- Dynamic priority ceilings are not supported by the underlying system ++ ++ procedure Set_Ceiling ++ (L : not null access Lock; ++ Prio : System.Any_Priority) ++ is ++ pragma Unreferenced (L, Prio); ++ begin ++ null; ++ end Set_Ceiling; ++ ++ ----------- ++ -- Sleep -- ++ ----------- ++ ++ procedure Sleep ++ (Self_ID : Task_Id; ++ Reason : System.Tasking.Task_States) ++ is ++ pragma Unreferenced (Reason); ++ ++ Result : Interfaces.C.int; ++ ++ begin ++ Result := ++ pthread_cond_wait ++ (cond => Self_ID.Common.LL.CV'Access, ++ mutex => (if Single_Lock ++ then Single_RTS_Lock'Access ++ else Self_ID.Common.LL.L'Access)); ++ ++ -- EINTR is not considered a failure ++ ++ pragma Assert (Result = 0 or else Result = EINTR); ++ end Sleep; ++ ++ ----------------- ++ -- Timed_Sleep -- ++ ----------------- ++ ++ -- This is for use within the run-time system, so abort is ++ -- assumed to be already deferred, and the caller should be ++ -- holding its own ATCB lock. ++ ++ procedure Timed_Sleep ++ (Self_ID : Task_Id; ++ Time : Duration; ++ Mode : ST.Delay_Modes; ++ Reason : Task_States; ++ Timedout : out Boolean; ++ Yielded : out Boolean) ++ is ++ pragma Unreferenced (Reason); ++ ++ Base_Time : constant Duration := Monotonic_Clock; ++ Check_Time : Duration := Base_Time; ++ Rel_Time : Duration; ++ Abs_Time : Duration; ++ Request : aliased timespec; ++ Result : Interfaces.C.int; ++ ++ begin ++ Timedout := True; ++ Yielded := False; ++ ++ if Mode = Relative then ++ Abs_Time := Duration'Min (Time, Max_Sensible_Delay) + Check_Time; ++ ++ if Relative_Timed_Wait then ++ Rel_Time := Duration'Min (Max_Sensible_Delay, Time); ++ end if; ++ ++ else ++ Abs_Time := Duration'Min (Check_Time + Max_Sensible_Delay, Time); ++ ++ if Relative_Timed_Wait then ++ Rel_Time := Duration'Min (Max_Sensible_Delay, Time - Check_Time); ++ end if; ++ end if; ++ ++ if Abs_Time > Check_Time then ++ Request := ++ To_Timespec (if Relative_Timed_Wait then Rel_Time else Abs_Time); ++ ++ loop ++ exit when Self_ID.Pending_ATC_Level < Self_ID.ATC_Nesting_Level; ++ ++ Result := ++ pthread_cond_timedwait ++ (cond => Self_ID.Common.LL.CV'Access, ++ mutex => (if Single_Lock ++ then Single_RTS_Lock'Access ++ else Self_ID.Common.LL.L'Access), ++ abstime => Request'Access); ++ ++ Check_Time := Monotonic_Clock; ++ exit when Abs_Time <= Check_Time or else Check_Time < Base_Time; ++ ++ if Result = 0 or Result = EINTR then ++ ++ -- Somebody may have called Wakeup for us ++ ++ Timedout := False; ++ exit; ++ end if; ++ ++ pragma Assert (Result = ETIMEDOUT); ++ end loop; ++ end if; ++ end Timed_Sleep; ++ ++ ----------------- ++ -- Timed_Delay -- ++ ----------------- ++ ++ -- This is for use in implementing delay statements, so we assume the ++ -- caller is abort-deferred but is holding no locks. ++ ++ procedure Timed_Delay ++ (Self_ID : Task_Id; ++ Time : Duration; ++ Mode : ST.Delay_Modes) ++ is ++ Base_Time : constant Duration := Monotonic_Clock; ++ Check_Time : Duration := Base_Time; ++ Abs_Time : Duration; ++ Rel_Time : Duration; ++ Request : aliased timespec; ++ ++ Result : Interfaces.C.int; ++ pragma Warnings (Off, Result); ++ ++ begin ++ if Single_Lock then ++ Lock_RTS; ++ end if; ++ ++ Write_Lock (Self_ID); ++ ++ if Mode = Relative then ++ Abs_Time := Duration'Min (Time, Max_Sensible_Delay) + Check_Time; ++ ++ if Relative_Timed_Wait then ++ Rel_Time := Duration'Min (Max_Sensible_Delay, Time); ++ end if; ++ ++ else ++ Abs_Time := Duration'Min (Check_Time + Max_Sensible_Delay, Time); ++ ++ if Relative_Timed_Wait then ++ Rel_Time := Duration'Min (Max_Sensible_Delay, Time - Check_Time); ++ end if; ++ end if; ++ ++ if Abs_Time > Check_Time then ++ Request := ++ To_Timespec (if Relative_Timed_Wait then Rel_Time else Abs_Time); ++ Self_ID.Common.State := Delay_Sleep; ++ ++ loop ++ exit when Self_ID.Pending_ATC_Level < Self_ID.ATC_Nesting_Level; ++ ++ Result := ++ pthread_cond_timedwait ++ (cond => Self_ID.Common.LL.CV'Access, ++ mutex => (if Single_Lock ++ then Single_RTS_Lock'Access ++ else Self_ID.Common.LL.L'Access), ++ abstime => Request'Access); ++ ++ Check_Time := Monotonic_Clock; ++ exit when Abs_Time <= Check_Time or else Check_Time < Base_Time; ++ ++ pragma Assert (Result = 0 ++ or else Result = ETIMEDOUT ++ or else Result = EINTR); ++ end loop; ++ ++ Self_ID.Common.State := Runnable; ++ end if; ++ ++ Unlock (Self_ID); ++ ++ if Single_Lock then ++ Unlock_RTS; ++ end if; ++ ++ Result := sched_yield; ++ end Timed_Delay; ++ ++ --------------------- ++ -- Monotonic_Clock -- ++ --------------------- ++ ++ function Monotonic_Clock return Duration is ++ TS : aliased timespec; ++ Result : Interfaces.C.int; ++ begin ++ Result := clock_gettime ++ (clock_id => CLOCK_REALTIME, tp => TS'Unchecked_Access); ++ pragma Assert (Result = 0); ++ return To_Duration (TS); ++ end Monotonic_Clock; ++ ++ ------------------- ++ -- RT_Resolution -- ++ ------------------- ++ ++ function RT_Resolution return Duration is ++ begin ++ return 10#1.0#E-6; ++ end RT_Resolution; ++ ++ ------------ ++ -- Wakeup -- ++ ------------ ++ ++ procedure Wakeup (T : Task_Id; Reason : System.Tasking.Task_States) is ++ pragma Unreferenced (Reason); ++ Result : Interfaces.C.int; ++ begin ++ Result := pthread_cond_signal (T.Common.LL.CV'Access); ++ pragma Assert (Result = 0); ++ end Wakeup; ++ ++ ----------- ++ -- Yield -- ++ ----------- ++ ++ procedure Yield (Do_Yield : Boolean := True) is ++ Result : Interfaces.C.int; ++ pragma Unreferenced (Result); ++ begin ++ if Do_Yield then ++ Result := sched_yield; ++ end if; ++ end Yield; ++ ++ ------------------ ++ -- Set_Priority -- ++ ------------------ ++ ++ procedure Set_Priority ++ (T : Task_Id; ++ Prio : System.Any_Priority; ++ Loss_Of_Inheritance : Boolean := False) ++ is ++ pragma Unreferenced (Loss_Of_Inheritance); ++ ++ begin ++ null; ++ end Set_Priority; ++ ++ ------------------ ++ -- Get_Priority -- ++ ------------------ ++ ++ function Get_Priority (T : Task_Id) return System.Any_Priority is ++ begin ++ return T.Common.Current_Priority; ++ end Get_Priority; ++ ++ ---------------- ++ -- Enter_Task -- ++ ---------------- ++ ++ procedure Enter_Task (Self_ID : Task_Id) is ++ begin ++ Self_ID.Common.LL.Thread := pthread_self; ++ Self_ID.Common.LL.LWP := lwp_self; ++ ++ Specific.Set (Self_ID); ++ ++ if Use_Alternate_Stack then ++ declare ++ Stack : aliased stack_t; ++ Result : Interfaces.C.int; ++ begin ++ Stack.ss_sp := Self_ID.Common.Task_Alternate_Stack; ++ Stack.ss_size := Alternate_Stack_Size; ++ Stack.ss_flags := 0; ++ Result := sigaltstack (Stack'Access, null); ++ pragma Assert (Result = 0); ++ end; ++ end if; ++ end Enter_Task; ++ ++ -------------- ++ -- New_ATCB -- ++ -------------- ++ ++ function New_ATCB (Entry_Num : Task_Entry_Index) return Task_Id is ++ begin ++ return new Ada_Task_Control_Block (Entry_Num); ++ end New_ATCB; ++ ++ ------------------- ++ -- Is_Valid_Task -- ++ ------------------- ++ ++ function Is_Valid_Task return Boolean renames Specific.Is_Valid_Task; ++ ++ ----------------------------- ++ -- Register_Foreign_Thread -- ++ ----------------------------- ++ ++ function Register_Foreign_Thread return Task_Id is ++ begin ++ if Is_Valid_Task then ++ return Self; ++ else ++ return Register_Foreign_Thread (pthread_self); ++ end if; ++ end Register_Foreign_Thread; ++ ++ -------------------- ++ -- Initialize_TCB -- ++ -------------------- ++ ++ procedure Initialize_TCB (Self_ID : Task_Id; Succeeded : out Boolean) is ++ Mutex_Attr : aliased pthread_mutexattr_t; ++ Result : Interfaces.C.int; ++ Cond_Attr : aliased pthread_condattr_t; ++ ++ begin ++ -- Give the task a unique serial number ++ ++ Self_ID.Serial_Number := Next_Serial_Number; ++ Next_Serial_Number := Next_Serial_Number + 1; ++ pragma Assert (Next_Serial_Number /= 0); ++ ++ if not Single_Lock then ++ Result := pthread_mutexattr_init (Mutex_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = 0 then ++ Result := ++ pthread_mutex_init ++ (Self_ID.Common.LL.L'Access, ++ Mutex_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ end if; ++ ++ if Result /= 0 then ++ Succeeded := False; ++ return; ++ end if; ++ ++ Result := pthread_mutexattr_destroy (Mutex_Attr'Access); ++ pragma Assert (Result = 0); ++ end if; ++ ++ Result := pthread_condattr_init (Cond_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = 0 then ++ Result := ++ pthread_cond_init ++ (Self_ID.Common.LL.CV'Access, Cond_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ end if; ++ ++ if Result = 0 then ++ Succeeded := True; ++ else ++ if not Single_Lock then ++ Result := pthread_mutex_destroy (Self_ID.Common.LL.L'Access); ++ pragma Assert (Result = 0); ++ end if; ++ ++ Succeeded := False; ++ end if; ++ ++ Result := pthread_condattr_destroy (Cond_Attr'Access); ++ pragma Assert (Result = 0); ++ end Initialize_TCB; ++ ++ ----------------- ++ -- Create_Task -- ++ ----------------- ++ ++ procedure Create_Task ++ (T : Task_Id; ++ Wrapper : System.Address; ++ Stack_Size : System.Parameters.Size_Type; ++ Priority : System.Any_Priority; ++ Succeeded : out Boolean) ++ is ++ Attributes : aliased pthread_attr_t; ++ Adjusted_Stack_Size : Interfaces.C.size_t; ++ Page_Size : constant Interfaces.C.size_t := Get_Page_Size; ++ Result : Interfaces.C.int; ++ ++ function Thread_Body_Access is new ++ Ada.Unchecked_Conversion (System.Address, Thread_Body); ++ ++ use System.Task_Info; ++ ++ begin ++ Adjusted_Stack_Size := ++ Interfaces.C.size_t (Stack_Size + Alternate_Stack_Size); ++ ++ if Stack_Base_Available then ++ ++ -- If Stack Checking is supported then allocate 2 additional pages: ++ ++ -- In the worst case, stack is allocated at something like ++ -- N * Get_Page_Size - epsilon, we need to add the size for 2 pages ++ -- to be sure the effective stack size is greater than what ++ -- has been asked. ++ ++ Adjusted_Stack_Size := Adjusted_Stack_Size + 2 * Page_Size; ++ end if; ++ ++ -- Round stack size as this is required by some OSes (Darwin) ++ ++ Adjusted_Stack_Size := Adjusted_Stack_Size + Page_Size - 1; ++ Adjusted_Stack_Size := ++ Adjusted_Stack_Size - Adjusted_Stack_Size mod Page_Size; ++ ++ Result := pthread_attr_init (Attributes'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result /= 0 then ++ Succeeded := False; ++ return; ++ end if; ++ ++ Result := ++ pthread_attr_setdetachstate ++ (Attributes'Access, PTHREAD_CREATE_DETACHED); ++ pragma Assert (Result = 0); ++ ++ Result := ++ pthread_attr_setstacksize ++ (Attributes'Access, Adjusted_Stack_Size); ++ pragma Assert (Result = 0); ++ ++ -- Since the initial signal mask of a thread is inherited from the ++ -- creator, and the Environment task has all its signals masked, we ++ -- do not need to manipulate caller's signal mask at this point. ++ -- All tasks in RTS will have All_Tasks_Mask initially. ++ ++ Result := pthread_create ++ (T.Common.LL.Thread'Access, ++ Attributes'Access, ++ Thread_Body_Access (Wrapper), ++ To_Address (T)); ++ pragma Assert (Result = 0 or else Result = EAGAIN); ++ ++ Succeeded := Result = 0; ++ ++ Result := pthread_attr_destroy (Attributes'Access); ++ pragma Assert (Result = 0); ++ ++ if Succeeded then ++ Set_Priority (T, Priority); ++ end if; ++ end Create_Task; ++ ++ ------------------ ++ -- Finalize_TCB -- ++ ------------------ ++ ++ procedure Finalize_TCB (T : Task_Id) is ++ Result : Interfaces.C.int; ++ Tmp : Task_Id := T; ++ Is_Self : constant Boolean := T = Self; ++ ++ procedure Free is new ++ Ada.Unchecked_Deallocation (Ada_Task_Control_Block, Task_Id); ++ ++ begin ++ if not Single_Lock then ++ Result := pthread_mutex_destroy (T.Common.LL.L'Access); ++ pragma Assert (Result = 0); ++ end if; ++ ++ Result := pthread_cond_destroy (T.Common.LL.CV'Access); ++ pragma Assert (Result = 0); ++ ++ if T.Known_Tasks_Index /= -1 then ++ Known_Tasks (T.Known_Tasks_Index) := null; ++ end if; ++ ++ Free (Tmp); ++ ++ if Is_Self then ++ Specific.Set (null); ++ end if; ++ end Finalize_TCB; ++ ++ --------------- ++ -- Exit_Task -- ++ --------------- ++ ++ procedure Exit_Task is ++ begin ++ -- Mark this task as unknown, so that if Self is called, it won't ++ -- return a dangling pointer. ++ ++ Specific.Set (null); ++ end Exit_Task; ++ ++ ---------------- ++ -- Abort_Task -- ++ ---------------- ++ ++ procedure Abort_Task (T : Task_Id) is ++ Result : Interfaces.C.int; ++ begin ++ if Abort_Handler_Installed then ++ Result := ++ pthread_kill ++ (T.Common.LL.Thread, ++ Signal (System.Interrupt_Management.Abort_Task_Interrupt)); ++ pragma Assert (Result = 0); ++ end if; ++ end Abort_Task; ++ ++ ---------------- ++ -- Initialize -- ++ ---------------- ++ ++ procedure Initialize (S : in out Suspension_Object) is ++ Mutex_Attr : aliased pthread_mutexattr_t; ++ Cond_Attr : aliased pthread_condattr_t; ++ Result : Interfaces.C.int; ++ ++ begin ++ -- Initialize internal state (always to False (RM D.10 (6))) ++ ++ S.State := False; ++ S.Waiting := False; ++ ++ -- Initialize internal mutex ++ ++ Result := pthread_mutexattr_init (Mutex_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = ENOMEM then ++ raise Storage_Error; ++ end if; ++ ++ Result := pthread_mutex_init (S.L'Access, Mutex_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result = ENOMEM then ++ Result := pthread_mutexattr_destroy (Mutex_Attr'Access); ++ pragma Assert (Result = 0); ++ ++ raise Storage_Error; ++ end if; ++ ++ Result := pthread_mutexattr_destroy (Mutex_Attr'Access); ++ pragma Assert (Result = 0); ++ ++ -- Initialize internal condition variable ++ ++ Result := pthread_condattr_init (Cond_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result /= 0 then ++ Result := pthread_mutex_destroy (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ if Result = ENOMEM then ++ raise Storage_Error; ++ end if; ++ end if; ++ ++ Result := pthread_cond_init (S.CV'Access, Cond_Attr'Access); ++ pragma Assert (Result = 0 or else Result = ENOMEM); ++ ++ if Result /= 0 then ++ Result := pthread_mutex_destroy (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ if Result = ENOMEM then ++ Result := pthread_condattr_destroy (Cond_Attr'Access); ++ pragma Assert (Result = 0); ++ raise Storage_Error; ++ end if; ++ end if; ++ ++ Result := pthread_condattr_destroy (Cond_Attr'Access); ++ pragma Assert (Result = 0); ++ end Initialize; ++ ++ -------------- ++ -- Finalize -- ++ -------------- ++ ++ procedure Finalize (S : in out Suspension_Object) is ++ Result : Interfaces.C.int; ++ ++ begin ++ -- Destroy internal mutex ++ ++ Result := pthread_mutex_destroy (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ -- Destroy internal condition variable ++ ++ Result := pthread_cond_destroy (S.CV'Access); ++ pragma Assert (Result = 0); ++ end Finalize; ++ ++ ------------------- ++ -- Current_State -- ++ ------------------- ++ ++ function Current_State (S : Suspension_Object) return Boolean is ++ begin ++ -- We do not want to use lock on this read operation. State is marked ++ -- as Atomic so that we ensure that the value retrieved is correct. ++ ++ return S.State; ++ end Current_State; ++ ++ --------------- ++ -- Set_False -- ++ --------------- ++ ++ procedure Set_False (S : in out Suspension_Object) is ++ Result : Interfaces.C.int; ++ ++ begin ++ SSL.Abort_Defer.all; ++ ++ Result := pthread_mutex_lock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ S.State := False; ++ ++ Result := pthread_mutex_unlock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ SSL.Abort_Undefer.all; ++ end Set_False; ++ ++ -------------- ++ -- Set_True -- ++ -------------- ++ ++ procedure Set_True (S : in out Suspension_Object) is ++ Result : Interfaces.C.int; ++ ++ begin ++ SSL.Abort_Defer.all; ++ ++ Result := pthread_mutex_lock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ -- If there is already a task waiting on this suspension object then ++ -- we resume it, leaving the state of the suspension object to False, ++ -- as it is specified in (RM D.10(9)). Otherwise, it just leaves ++ -- the state to True. ++ ++ if S.Waiting then ++ S.Waiting := False; ++ S.State := False; ++ ++ Result := pthread_cond_signal (S.CV'Access); ++ pragma Assert (Result = 0); ++ ++ else ++ S.State := True; ++ end if; ++ ++ Result := pthread_mutex_unlock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ SSL.Abort_Undefer.all; ++ end Set_True; ++ ++ ------------------------ ++ -- Suspend_Until_True -- ++ ------------------------ ++ ++ procedure Suspend_Until_True (S : in out Suspension_Object) is ++ Result : Interfaces.C.int; ++ ++ begin ++ SSL.Abort_Defer.all; ++ ++ Result := pthread_mutex_lock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ if S.Waiting then ++ ++ -- Program_Error must be raised upon calling Suspend_Until_True ++ -- if another task is already waiting on that suspension object ++ -- (RM D.10(10)). ++ ++ Result := pthread_mutex_unlock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ SSL.Abort_Undefer.all; ++ ++ raise Program_Error; ++ ++ else ++ -- Suspend the task if the state is False. Otherwise, the task ++ -- continues its execution, and the state of the suspension object ++ -- is set to False (ARM D.10 par. 9). ++ ++ if S.State then ++ S.State := False; ++ else ++ S.Waiting := True; ++ ++ loop ++ -- Loop in case pthread_cond_wait returns earlier than expected ++ -- (e.g. in case of EINTR caused by a signal). ++ ++ Result := pthread_cond_wait (S.CV'Access, S.L'Access); ++ pragma Assert (Result = 0 or else Result = EINTR); ++ ++ exit when not S.Waiting; ++ end loop; ++ end if; ++ ++ Result := pthread_mutex_unlock (S.L'Access); ++ pragma Assert (Result = 0); ++ ++ SSL.Abort_Undefer.all; ++ end if; ++ end Suspend_Until_True; ++ ++ ---------------- ++ -- Check_Exit -- ++ ---------------- ++ ++ -- Dummy version ++ ++ function Check_Exit (Self_ID : ST.Task_Id) return Boolean is ++ pragma Unreferenced (Self_ID); ++ begin ++ return True; ++ end Check_Exit; ++ ++ -------------------- ++ -- Check_No_Locks -- ++ -------------------- ++ ++ function Check_No_Locks (Self_ID : ST.Task_Id) return Boolean is ++ pragma Unreferenced (Self_ID); ++ begin ++ return True; ++ end Check_No_Locks; ++ ++ ---------------------- ++ -- Environment_Task -- ++ ---------------------- ++ ++ function Environment_Task return Task_Id is ++ begin ++ return Environment_Task_Id; ++ end Environment_Task; ++ ++ -------------- ++ -- Lock_RTS -- ++ -------------- ++ ++ procedure Lock_RTS is ++ begin ++ Write_Lock (Single_RTS_Lock'Access, Global_Lock => True); ++ end Lock_RTS; ++ ++ ---------------- ++ -- Unlock_RTS -- ++ ---------------- ++ ++ procedure Unlock_RTS is ++ begin ++ Unlock (Single_RTS_Lock'Access, Global_Lock => True); ++ end Unlock_RTS; ++ ++ ------------------ ++ -- Suspend_Task -- ++ ------------------ ++ ++ function Suspend_Task ++ (T : ST.Task_Id; ++ Thread_Self : Thread_Id) return Boolean ++ is ++ pragma Unreferenced (T, Thread_Self); ++ begin ++ return False; ++ end Suspend_Task; ++ ++ ----------------- ++ -- Resume_Task -- ++ ----------------- ++ ++ function Resume_Task ++ (T : ST.Task_Id; ++ Thread_Self : Thread_Id) return Boolean ++ is ++ pragma Unreferenced (T, Thread_Self); ++ begin ++ return False; ++ end Resume_Task; ++ ++ -------------------- ++ -- Stop_All_Tasks -- ++ -------------------- ++ ++ procedure Stop_All_Tasks is ++ begin ++ null; ++ end Stop_All_Tasks; ++ ++ --------------- ++ -- Stop_Task -- ++ --------------- ++ ++ function Stop_Task (T : ST.Task_Id) return Boolean is ++ pragma Unreferenced (T); ++ begin ++ return False; ++ end Stop_Task; ++ ++ ------------------- ++ -- Continue_Task -- ++ ------------------- ++ ++ function Continue_Task (T : ST.Task_Id) return Boolean is ++ pragma Unreferenced (T); ++ begin ++ return False; ++ end Continue_Task; ++ ++ ---------------- ++ -- Initialize -- ++ ---------------- ++ ++ procedure Initialize (Environment_Task : Task_Id) is ++ act : aliased struct_sigaction; ++ old_act : aliased struct_sigaction; ++ Tmp_Set : aliased sigset_t; ++ Result : Interfaces.C.int; ++ ++ function State ++ (Int : System.Interrupt_Management.Interrupt_ID) return Character; ++ pragma Import (C, State, "__gnat_get_interrupt_state"); ++ -- Get interrupt state. Defined in a-init.c ++ -- The input argument is the interrupt number, ++ -- and the result is one of the following: ++ ++ Default : constant Character := 's'; ++ -- 'n' this interrupt not set by any Interrupt_State pragma ++ -- 'u' Interrupt_State pragma set state to User ++ -- 'r' Interrupt_State pragma set state to Runtime ++ -- 's' Interrupt_State pragma set state to System (use "default" ++ -- system handler) ++ ++ begin ++ Environment_Task_Id := Environment_Task; ++ ++ Interrupt_Management.Initialize; ++ ++ -- Prepare the set of signals that should unblocked in all tasks ++ ++ Result := sigemptyset (Unblocked_Signal_Mask'Access); ++ pragma Assert (Result = 0); ++ ++ for J in Interrupt_Management.Interrupt_ID loop ++ if System.Interrupt_Management.Keep_Unmasked (J) then ++ Result := sigaddset (Unblocked_Signal_Mask'Access, Signal (J)); ++ pragma Assert (Result = 0); ++ end if; ++ end loop; ++ ++ -- Initialize the lock used to synchronize chain of all ATCBs ++ ++ Initialize_Lock (Single_RTS_Lock'Access, RTS_Lock_Level); ++ ++ Specific.Initialize (Environment_Task); ++ ++ if Use_Alternate_Stack then ++ Environment_Task.Common.Task_Alternate_Stack := ++ Alternate_Stack'Address; ++ end if; ++ ++ -- Make environment task known here because it doesn't go through ++ -- Activate_Tasks, which does it for all other tasks. ++ ++ Known_Tasks (Known_Tasks'First) := Environment_Task; ++ Environment_Task.Known_Tasks_Index := Known_Tasks'First; ++ ++ Enter_Task (Environment_Task); ++ ++ if State ++ (System.Interrupt_Management.Abort_Task_Interrupt) /= Default ++ then ++ act.sa_flags := 0; ++ act.sa_handler := Abort_Handler'Address; ++ ++ Result := sigemptyset (Tmp_Set'Access); ++ pragma Assert (Result = 0); ++ act.sa_mask := Tmp_Set; ++ ++ Result := ++ sigaction ++ (Signal (System.Interrupt_Management.Abort_Task_Interrupt), ++ act'Unchecked_Access, ++ old_act'Unchecked_Access); ++ pragma Assert (Result = 0); ++ Abort_Handler_Installed := True; ++ end if; ++ end Initialize; ++ ++end System.Task_Primitives.Operations; diff -Nru gcc-4.6-4.6.2/debian/patches/ada-symbolic-tracebacks.diff gcc-4.6-4.6.4/debian/patches/ada-symbolic-tracebacks.diff --- gcc-4.6-4.6.2/debian/patches/ada-symbolic-tracebacks.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/ada-symbolic-tracebacks.diff 2013-04-14 23:00:34.000000000 +0000 @@ -34,7 +34,7 @@ ADA_INCLUDES = -I- -I. -I$(srcdir)/ada -@@ -2235,7 +2235,7 @@ +@@ -2243,7 +2243,7 @@ a-nucoar.o a-nurear.o i-forbla.o i-forlap.o s-gearop.o GNATRTL_OBJS = $(GNATRTL_NONTASKING_OBJS) $(GNATRTL_TASKING_OBJS) \ @@ -43,7 +43,7 @@ # Default run time files -@@ -2358,7 +2358,6 @@ +@@ -2366,7 +2366,6 @@ for file in $(RTSDIR)/*.ali; do \ $(INSTALL_DATA_DATE) $$file $(DESTDIR)$(ADA_RTL_OBJ_DIR); \ done @@ -51,7 +51,7 @@ -cd $(RTSDIR); for file in *$(arext);do \ $(INSTALL_DATA) $$file $(DESTDIR)$(ADA_RTL_OBJ_DIR); \ $(RANLIB_FOR_TARGET) $(DESTDIR)$(ADA_RTL_OBJ_DIR)/$$file; \ -@@ -2469,7 +2468,7 @@ +@@ -2508,7 +2507,7 @@ $(GNATRTL_OBJS) $(RM) $(RTSDIR)/libgnat$(arext) $(RTSDIR)/libgnarl$(arext) $(AR_FOR_TARGET) $(AR_FLAGS) $(RTSDIR)/libgnat$(arext) \ @@ -60,7 +60,7 @@ $(RANLIB_FOR_TARGET) $(RTSDIR)/libgnat$(arext) $(AR_FOR_TARGET) $(AR_FLAGS) $(RTSDIR)/libgnarl$(arext) \ $(addprefix $(RTSDIR)/,$(GNATRTL_TASKING_OBJS)) -@@ -2499,6 +2498,7 @@ +@@ -2538,6 +2537,7 @@ $(TARGET_LIBGCC2_CFLAGS) \ -o libgnat$(hyphen)$(LIBRARY_VERSION)$(soext) \ $(GNATRTL_NONTASKING_OBJS) $(LIBGNAT_OBJS) \ @@ -68,7 +68,7 @@ $(SO_OPTS)libgnat$(hyphen)$(LIBRARY_VERSION)$(soext) \ $(MISCLIB) -lm cd $(RTSDIR); ../../xgcc -B../../ -shared $(GNATLIBCFLAGS) \ -@@ -2761,6 +2761,7 @@ +@@ -2800,6 +2800,7 @@ sysdep.o : sysdep.c raise-gcc.o : raise-gcc.c raise.h raise.o : raise.c raise.h @@ -325,3 +325,16 @@ use type System.Address; +Index: b/src/gcc/ada/tracebak.c +=================================================================== +--- a/src/gcc/ada/tracebak.c ++++ b/src/gcc/ada/tracebak.c +@@ -320,7 +320,7 @@ + /* Starting with GCC 4.6, -fomit-frame-pointer is turned on by default for + 32-bit x86/Linux as well and DWARF 2 unwind tables are emitted instead. + See the x86-64 case below for the drawbacks with this approach. */ +-#if defined (linux) && (__GNUC__ * 10 + __GNUC_MINOR__ > 45) ++#if (defined (linux) || defined(__GNU__) || defined(__kFreeBSD_kernel__)) && (__GNUC__ * 10 + __GNUC_MINOR__ > 45) + #define USE_GCC_UNWINDER + #else + #define USE_GENERIC_UNWINDER diff -Nru gcc-4.6-4.6.2/debian/patches/alpha-no-ev4-directive.diff gcc-4.6-4.6.4/debian/patches/alpha-no-ev4-directive.diff --- gcc-4.6-4.6.2/debian/patches/alpha-no-ev4-directive.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/alpha-no-ev4-directive.diff 2013-04-14 23:00:34.000000000 +0000 @@ -6,7 +6,7 @@ --- a/src/gcc/config/alpha/alpha.c +++ b/src/gcc/config/alpha/alpha.c -@@ -9740,7 +9740,7 @@ +@@ -9733,7 +9733,7 @@ fputs ("\t.set nomacro\n", asm_out_file); if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX) { @@ -15,7 +15,7 @@ if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX) arch = "ev6"; -@@ -9750,10 +9750,9 @@ +@@ -9743,10 +9743,9 @@ arch = "ev56"; else if (alpha_cpu == PROCESSOR_EV5) arch = "ev5"; diff -Nru gcc-4.6-4.6.2/debian/patches/arm-dynamic-linker-trunk.diff gcc-4.6-4.6.4/debian/patches/arm-dynamic-linker-trunk.diff --- gcc-4.6-4.6.2/debian/patches/arm-dynamic-linker-trunk.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-dynamic-linker-trunk.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -# DP: For ARM hard float, set the dynamic linker to -# DP: /lib/arm-linux-gnueabihf/ld-linux.so.3. - ---- a/src/gcc/config/arm/linux-elf.h -+++ b/src/gcc/config/arm/linux-elf.h -@@ -59,14 +59,18 @@ - - #define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc" - --#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" -+#define LINUX_DYNAMIC_LINKER_SF "/lib/ld-linux.so.3" -+#define LINUX_DYNAMIC_LINKER_HF "/lib/arm-linux-gnueabihf/ld-linux.so.3" - - #define LINUX_TARGET_LINK_SPEC "%{h*} \ - %{static:-Bstatic} \ - %{shared:-shared} \ - %{symbolic:-Bsymbolic} \ - %{rdynamic:-export-dynamic} \ -- -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ -+ %{msoft-float:-dynamic-linker " LINUX_DYNAMIC_LINKER_SF "} \ -+ %{mfloat-abi=soft*:-dynamic-linker " LINUX_DYNAMIC_LINKER_SF "} \ -+ %{mhard-float:-dynamic-linker " LINUX_DYNAMIC_LINKER_HF "} \ -+ %{mfloat-abi=hard:-dynamic-linker " LINUX_DYNAMIC_LINKER_HF "} \ - -X \ - %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ - SUBTARGET_EXTRA_LINK_SPEC diff -Nru gcc-4.6-4.6.2/debian/patches/arm-dynamic-linker.diff gcc-4.6-4.6.4/debian/patches/arm-dynamic-linker.diff --- gcc-4.6-4.6.2/debian/patches/arm-dynamic-linker.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-dynamic-linker.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,26 +1,56 @@ # DP: For ARM hard float, set the dynamic linker to -# DP: /lib/arm-linux-gnueabihf/ld-linux.so.3. +# DP: /lib/ld-linux-armhf.so.3. ---- a/src/gcc/config/arm/linux-elf.h -+++ b/src/gcc/config/arm/linux-elf.h -@@ -62,14 +62,18 @@ +2012-05-01 Richard Earnshaw + + * arm/linux-eabi.h (GLIBC_DYNAMIC_LINKER_DEFAULT): Avoid ifdef + comparing enumeration values. Update comments. + +2012-04-26 Michael Hope + Richard Earnshaw + + * config/arm/linux-eabi.h (GLIBC_DYNAMIC_LINKER_SOFT_FLOAT): Define. + (GLIBC_DYNAMIC_LINKER_HARD_FLOAT): Define. + (GLIBC_DYNAMIC_LINKER_DEFAULT): Define. + (GLIBC_DYNAMIC_LINKER): Redefine to use the hard float path. + +--- a/src/gcc/config/arm/linux-eabi.h ++++ b/src/gcc/config/arm/linux-eabi.h +@@ -32,7 +32,8 @@ + while (false) + + /* We default to a soft-float ABI so that binaries can run on all +- target hardware. */ ++ target hardware. If you override this to use the hard-float ABI then ++ change the setting of GLIBC_DYNAMIC_LINKER_DEFAULT as well. */ + #undef TARGET_DEFAULT_FLOAT_ABI + #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT - #define LIBGCC_SPEC "%{msoft-float:-lfloat} %{mfloat-abi=soft*:-lfloat} -lgcc" +@@ -59,11 +60,24 @@ + #undef SUBTARGET_EXTRA_LINK_SPEC + #define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION --#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" -+#define LINUX_DYNAMIC_LINKER_SF "/lib/ld-linux.so.3" -+#define LINUX_DYNAMIC_LINKER_HF "/lib/arm-linux-gnueabihf/ld-linux.so.3" +-/* Use ld-linux.so.3 so that it will be possible to run "classic" +- GNU/Linux binaries on an EABI system. */ ++/* GNU/Linux on ARM currently supports three dynamic linkers: ++ - ld-linux.so.2 - for the legacy ABI ++ - ld-linux.so.3 - for the EABI-derived soft-float ABI ++ - ld-linux-armhf.so.3 - for the EABI-derived hard-float ABI. ++ All the dynamic linkers live in /lib. ++ We default to soft-float, but this can be overridden by changing both ++ GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI. */ ++ + #undef GLIBC_DYNAMIC_LINKER +-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.3" ++#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3" ++#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3" ++#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT - #define LINUX_TARGET_LINK_SPEC "%{h*} \ - %{static:-Bstatic} \ - %{shared:-shared} \ - %{symbolic:-Bsymbolic} \ - %{rdynamic:-export-dynamic} \ -- -dynamic-linker " LINUX_DYNAMIC_LINKER " \ -+ %{msoft-float:-dynamic-linker " LINUX_DYNAMIC_LINKER_SF "} \ -+ %{mfloat-abi=soft*:-dynamic-linker " LINUX_DYNAMIC_LINKER_SF "} \ -+ %{mhard-float:-dynamic-linker " LINUX_DYNAMIC_LINKER_HF "} \ -+ %{mfloat-abi=hard:-dynamic-linker " LINUX_DYNAMIC_LINKER_HF "} \ - -X \ - --hash-style=both \ - %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ ++#define GLIBC_DYNAMIC_LINKER \ ++ "%{mfloat-abi=hard:" GLIBC_DYNAMIC_LINKER_HARD_FLOAT "} \ ++ %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \ ++ %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}" ++ + /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ + #undef LINK_SPEC diff -Nru gcc-4.6-4.6.2/debian/patches/arm-multilib-defaults.diff gcc-4.6-4.6.4/debian/patches/arm-multilib-defaults.diff --- gcc-4.6-4.6.2/debian/patches/arm-multilib-defaults.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-multilib-defaults.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,103 @@ +# DP: Set MULTILIB_DEFAULTS for ARM multilib builds + +--- a/src/gcc/config.gcc ++++ a/src/gcc/config.gcc +@@ -3013,10 +3013,18 @@ + esac + + case "$with_float" in +- "" \ +- | soft | hard | softfp) ++ "") + # OK + ;; ++ soft) ++ tm_defines="${tm_defines} TARGET_CONFIGURED_FLOAT_ABI=0" ++ ;; ++ softfp) ++ tm_defines="${tm_defines} TARGET_CONFIGURED_FLOAT_ABI=1" ++ ;; ++ hard) ++ tm_defines="${tm_defines} TARGET_CONFIGURED_FLOAT_ABI=2" ++ ;; + *) + echo "Unknown floating point type used in --with-float=$with_float" 1>&2 + exit 1 +@@ -3060,6 +3068,9 @@ + "" \ + | arm | thumb ) + #OK ++ if test "$with_mode" = thumb; then ++ tm_defines="${tm_defines} TARGET_CONFIGURED_THUMB_MODE=1" ++ fi + ;; + *) + echo "Unknown mode used in --with-mode=$with_mode" +--- a/src/gcc/config/arm/linux-eabi.h ++++ b/src/gcc/config/arm/linux-eabi.h +@@ -35,7 +35,21 @@ + target hardware. If you override this to use the hard-float ABI then + change the setting of GLIBC_DYNAMIC_LINKER_DEFAULT as well. */ + #undef TARGET_DEFAULT_FLOAT_ABI ++#ifdef TARGET_CONFIGURED_FLOAT_ABI ++#if TARGET_CONFIGURED_FLOAT_ABI == 2 ++#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD ++#define MULTILIB_DEFAULT_FLOAT_ABI "mfloat-abi=hard" ++#elif TARGET_CONFIGURED_FLOAT_ABI == 1 ++#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFTFP ++#define MULTILIB_DEFAULT_FLOAT_ABI "mfloat-abi=softfp" ++#else + #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT ++#define MULTILIB_DEFAULT_FLOAT_ABI "mfloat-abi=soft" ++#endif ++#else ++#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT ++#define MULTILIB_DEFAULT_FLOAT_ABI "mfloat-abi=soft" ++#endif + + /* We default to the "aapcs-linux" ABI so that enums are int-sized by + default. */ +@@ -71,13 +85,43 @@ + #undef GLIBC_DYNAMIC_LINKER + #define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3" + #define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3" ++#ifdef TARGET_CONFIGURED_FLOAT_ABI ++#if TARGET_CONFIGURED_FLOAT_ABI == 2 ++#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_HARD_FLOAT ++#else + #define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT ++#endif ++#else ++#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT ++#endif + + #define GLIBC_DYNAMIC_LINKER \ + "%{mfloat-abi=hard:" GLIBC_DYNAMIC_LINKER_HARD_FLOAT "} \ + %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \ + %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}" + ++/* Set the multilib defaults according the configuration, needed to ++ let gcc -print-multi-dir do the right thing. */ ++ ++#if TARGET_BIG_ENDIAN_DEFAULT ++#define MULTILIB_DEFAULT_ENDIAN "mbig-endian" ++#else ++#define MULTILIB_DEFAULT_ENDIAN "mlittle-endian" ++#endif ++ ++#ifndef TARGET_CONFIGURED_THUMB_MODE ++#define MULTILIB_DEFAULT_MODE "marm" ++#elif TARGET_CONFIGURED_THUMB_MODE == 1 ++#define MULTILIB_DEFAULT_MODE "mthumb" ++#else ++#define MULTILIB_DEFAULT_MODE "marm" ++#endif ++ ++#undef MULTILIB_DEFAULTS ++#define MULTILIB_DEFAULTS \ ++ { MULTILIB_DEFAULT_MODE, MULTILIB_DEFAULT_ENDIAN, \ ++ MULTILIB_DEFAULT_FLOAT_ABI, "mno-thumb-interwork" } ++ + /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ + #undef LINK_SPEC diff -Nru gcc-4.6-4.6.2/debian/patches/arm-multilib-soft.diff gcc-4.6-4.6.4/debian/patches/arm-multilib-soft.diff --- gcc-4.6-4.6.2/debian/patches/arm-multilib-soft.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-multilib-soft.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,27 @@ +# DP: ARM hard/soft float multilib support + +Index: b/src/gcc/config/arm/t-linux-eabi +=================================================================== +--- a/src/gcc/config/arm/t-linux-eabi 2011-01-03 20:52:22.000000000 +0000 ++++ b/src/gcc/config/arm/t-linux-eabi 2011-08-21 21:08:47.583351817 +0000 +@@ -24,6 +24,20 @@ + MULTILIB_OPTIONS = + MULTILIB_DIRNAMES = + ++ifeq ($(with_float),hard) ++MULTILIB_OPTIONS = mfloat-abi=soft/mfloat-abi=hard ++MULTILIB_DIRNAMES = sf hf ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = mfloat-abi?hard=mhard-float mfloat-abi?soft=msoft-float mfloat-abi?soft=mfloat-abi?softfp ++MULTILIB_OSDIRNAMES = arm-linux-gnueabi:arm-linux-gnueabi ../lib:arm-linux-gnueabihf ++else ++MULTILIB_OPTIONS = mfloat-abi=soft/mfloat-abi=hard ++MULTILIB_DIRNAMES = sf hf ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = mfloat-abi?hard=mhard-float mfloat-abi?soft=msoft-float mfloat-abi?soft=mfloat-abi?softfp ++MULTILIB_OSDIRNAMES = ../lib:arm-linux-gnueabi arm-linux-gnueabihf:arm-linux-gnueabihf ++endif ++ + #MULTILIB_OPTIONS += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te + #MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te + #MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* diff -Nru gcc-4.6-4.6.2/debian/patches/arm-multilib-softfp.diff gcc-4.6-4.6.4/debian/patches/arm-multilib-softfp.diff --- gcc-4.6-4.6.2/debian/patches/arm-multilib-softfp.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-multilib-softfp.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,27 @@ +# DP: ARM hard/softfp float multilib support + +Index: b/src/gcc/config/arm/t-linux-eabi +=================================================================== +--- a/src/gcc/config/arm/t-linux-eabi 2011-01-03 20:52:22.000000000 +0000 ++++ b/src/gcc/config/arm/t-linux-eabi 2011-08-21 21:08:47.583351817 +0000 +@@ -24,6 +24,20 @@ + MULTILIB_OPTIONS = + MULTILIB_DIRNAMES = + ++ifeq ($(with_float),hard) ++MULTILIB_OPTIONS = mfloat-abi=softfp/mfloat-abi=hard ++MULTILIB_DIRNAMES = sf hf ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = mfloat-abi?hard=mhard-float mfloat-abi?softfp=msoft-float mfloat-abi?softfp=mfloat-abi?soft ++MULTILIB_OSDIRNAMES = arm-linux-gnueabi:arm-linux-gnueabi ../lib:arm-linux-gnueabihf ++else ++MULTILIB_OPTIONS = mfloat-abi=softfp/mfloat-abi=hard ++MULTILIB_DIRNAMES = sf hf ++MULTILIB_EXCEPTIONS = ++MULTILIB_MATCHES = mfloat-abi?hard=mhard-float mfloat-abi?softfp=msoft-float mfloat-abi?softfp=mfloat-abi?soft ++MULTILIB_OSDIRNAMES = ../lib:arm-linux-gnueabi arm-linux-gnueabihf:arm-linux-gnueabihf ++endif ++ + #MULTILIB_OPTIONS += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te + #MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te + #MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* diff -Nru gcc-4.6-4.6.2/debian/patches/arm-multilib.diff gcc-4.6-4.6.4/debian/patches/arm-multilib.diff --- gcc-4.6-4.6.2/debian/patches/arm-multilib.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-multilib.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,9 +1,7 @@ # DP: ARM hard/soft float multilib -Index: gcc-4.6-4.6.1/src/gcc/config/arm/t-linux-eabi -=================================================================== ---- gcc-4.6-4.6.1.orig/src/gcc/config/arm/t-linux-eabi 2011-01-03 20:52:22.000000000 +0000 -+++ gcc-4.6-4.6.1/src/gcc/config/arm/t-linux-eabi 2011-08-21 21:08:47.583351817 +0000 +--- a/src/gcc/config/arm/t-linux-eabi ++++ b/src/gcc/config/arm/t-linux-eabi @@ -24,6 +24,20 @@ MULTILIB_OPTIONS = MULTILIB_DIRNAMES = diff -Nru gcc-4.6-4.6.2/debian/patches/arm-no-va_list-warn.diff gcc-4.6-4.6.4/debian/patches/arm-no-va_list-warn.diff --- gcc-4.6-4.6.2/debian/patches/arm-no-va_list-warn.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/arm-no-va_list-warn.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,31 @@ +# DP: Don't warn anymore that 4.4 has changed the `va_list' mangling. + +gcc/ + +2012-09-21 Matthias Klose + + * config/arm/arm.c (arm_mangle_type): Don't warn anymore that + 4.4 has changed the `va_list' mangling. + +Index: gcc/config/arm/arm.c +=================================================================== +--- a/src/gcc/config/arm/arm.c (revision 191609) ++++ b/src/gcc/config/arm/arm.c (revision 191610) +@@ -25072,16 +25072,7 @@ + has to be managled as if it is in the "std" namespace. */ + if (TARGET_AAPCS_BASED + && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) +- { +- static bool warned; +- if (!warned && warn_psabi && !in_system_header) +- { +- warned = true; +- inform (input_location, +- "the mangling of % has changed in GCC 4.4"); +- } +- return "St9__va_list"; +- } ++ return "St9__va_list"; + + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) diff -Nru gcc-4.6-4.6.2/debian/patches/armhf-triplet.diff gcc-4.6-4.6.4/debian/patches/armhf-triplet.diff --- gcc-4.6-4.6.2/debian/patches/armhf-triplet.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/armhf-triplet.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,8 +1,8 @@ # DP: add support for arm-linux-*eabi* triplets; useful for armhf ---- a/src/libjava/configure.ac.orig +--- a/src/libjava/configure.ac +++ b/src/libjava/configure.ac -@@ -924,7 +924,7 @@ +@@ -926,7 +926,7 @@ # on Darwin -single_module speeds up loading of the dynamic libraries. extra_ldflags_libjava=-Wl,-single_module ;; @@ -11,9 +11,9 @@ # Some of the ARM unwinder code is actually in libstdc++. We # could in principle replicate it in libgcj, but it's better to # have a dependency on libstdc++. ---- a/src/gcc/config.gcc.orig +--- a/src/gcc/config.gcc +++ b/src/gcc/config.gcc -@@ -822,7 +822,7 @@ +@@ -826,7 +826,7 @@ esac tmake_file="${tmake_file} t-linux arm/t-arm" case ${target} in @@ -22,7 +22,7 @@ tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h" tm_file="$tm_file ../../libgcc/config/arm/bpabi-lib.h" tmake_file="$tmake_file arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi t-slibgcc-libgcc" -@@ -850,7 +850,7 @@ +@@ -854,7 +854,7 @@ tm_file="dbxelf.h elfos.h arm/unknown-elf.h arm/elf.h arm/linux-gas.h arm/uclinux-elf.h glibc-stdint.h" tmake_file="arm/t-arm arm/t-arm-elf" case ${target} in @@ -31,9 +31,9 @@ tm_file="$tm_file arm/bpabi.h arm/uclinux-eabi.h" tm_file="$tm_file ../../libgcc/config/arm/bpabi-lib.h" tmake_file="$tmake_file arm/t-bpabi" ---- a/src/gcc/testsuite/lib/target-supports.exp.orig +--- a/src/gcc/testsuite/lib/target-supports.exp +++ b/src/gcc/testsuite/lib/target-supports.exp -@@ -3235,7 +3235,7 @@ +@@ -3265,7 +3265,7 @@ || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget alpha*-*-*] @@ -42,7 +42,7 @@ || [istarget bfin*-*linux*] || [istarget hppa*-*linux*] || [istarget s390*-*-*] -@@ -3266,7 +3266,7 @@ +@@ -3296,7 +3296,7 @@ || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget alpha*-*-*] @@ -51,9 +51,9 @@ || [istarget hppa*-*linux*] || [istarget s390*-*-*] || [istarget powerpc*-*-*] ---- a/src/gcc/ada/gcc-interface/Makefile.in.orig +--- a/src/gcc/ada/gcc-interface/Makefile.in +++ b/src/gcc/ada/gcc-interface/Makefile.in -@@ -1846,7 +1846,7 @@ +@@ -1841,7 +1841,7 @@ LIBRARY_VERSION := $(LIB_VERSION) endif @@ -62,9 +62,9 @@ LIBGNAT_TARGET_PAIRS = \ a-intnam.ads // ---- a/src/libstdc++-v3/testsuite/20_util/make_signed/requirements/typedefs-2.cc.orig +--- a/src/libstdc++-v3/testsuite/20_util/make_signed/requirements/typedefs-2.cc +++ b/src/libstdc++-v3/testsuite/20_util/make_signed/requirements/typedefs-2.cc @@ -1,5 +1,5 @@ // { dg-options "-std=gnu++0x -funsigned-char -fshort-enums" } diff -Nru gcc-4.6-4.6.2/debian/patches/config-ml.diff gcc-4.6-4.6.4/debian/patches/config-ml.diff --- gcc-4.6-4.6.2/debian/patches/config-ml.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/config-ml.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,35 +1,11 @@ -# DP: disable some biarch libraries for biarch builds - ---- - config-ml.in | 45 ++++++++++++++++++++++++++++++++++++++++++++- - 1 files changed, 44 insertions(+), 1 deletions(-) +# DP: - Disable some biarch libraries for biarch builds. +# DP: - Fix multilib builds on kernels which don't support all multilibs. +Index: b/src/config-ml.in +=================================================================== --- a/src/config-ml.in +++ b/src/config-ml.in -@@ -306,6 +306,11 @@ - done - fi - ;; -+i[34567]86-*-*) -+ case " $multidirs " in -+ *" 64 "*) ac_configure_args="${ac_configure_args} --host=x86_64-linux-gnu" -+ esac -+ ;; - m68*-*-*) - if [ x$enable_softfloat = xno ] - then -@@ -477,9 +482,36 @@ - esac - done - fi -+ case " $multidirs " in -+ *" 64 "*) ac_configure_args="${ac_configure_args} --host=powerpc64-linux-gnu" -+ esac -+ ;; -+s390-*-*) -+ case " $multidirs " in -+ *" 64 "*) ac_configure_args="${ac_configure_args} --host=s390x-linux-gnu" -+ esac +@@ -467,6 +467,25 @@ ;; esac @@ -43,7 +19,7 @@ +multidirs="" +for x in ${old_multidirs}; do + case " $x " in -+ " 32 "|" n32 "|" 64 "|" hf "|" sf ") ++ " 32 "|" n32 "|" x32 "|" 64 "|" hf "|" sf ") + case "$biarch_multidir_names" in + *"$ml_srcbase"*) multidirs="${multidirs} ${x}" ;; + esac @@ -55,7 +31,43 @@ # Remove extraneous blanks from multidirs. # Tests like `if [ -n "$multidirs" ]' require it. multidirs=`echo "$multidirs" | sed -e 's/^[ ][ ]*//' -e 's/[ ][ ]*$//' -e 's/[ ][ ]*/ /g'` -@@ -871,9 +903,20 @@ +@@ -654,6 +673,35 @@ + + for ml_dir in ${multidirs}; do + ++ # a native build fails if the running kernel doesn't support the multilib ++ # variant; force cross compilation for these cases. ++ ml_host_arg= ++ case "${host}" in ++ i[34567]86-*-linux*) ++ case "${ml_dir}" in ++ 64) ml_host_arg="--host=x86_64-linux-gnu";; ++ x32) ml_host_arg="--host=x86_64-linux-gnux32";; ++ esac ++ ;; ++ powerpc-*-linux*) ++ case "${ml_dir}" in ++ 64) ml_host_arg="--host=powerpc64-linux-gnu" ++ esac ++ ;; ++ s390-*-linux*) ++ case "${ml_dir}" in ++ 64) ml_host_arg="--host=s390x-linux-gnu" ++ esac ++ ;; ++ x86_64-*-linux*) ++ case "${ml_dir}" in ++ x32) ml_host_arg="--host=x86_64-linux-gnux32" ++ esac ++ esac ++ if [ -n "${ml_host_arg}" ]; then ++ ml_host_arg="${ml_host_arg} --with-default-host-alias=${host_alias}" ++ fi ++ + if [ "${ml_verbose}" = --verbose ]; then + echo "Running configure in multilib subdir ${ml_dir}" + echo "pwd: `${PWDCMD-pwd}`" +@@ -858,9 +906,20 @@ fi fi @@ -73,7 +85,76 @@ if eval ${ml_config_env} ${ml_config_shell} ${ml_recprog} \ --with-multisubdir=${ml_dir} --with-multisrctop=${multisrctop} \ - ${ac_configure_args} ${ml_config_env} ${ml_srcdiroption} ; then -+ ${ac_configure_args} ${ml_configure_args} ${ml_srcdiroption} ; then ++ ${ac_configure_args} ${ml_configure_args} ${ml_host_arg} ${ml_srcdiroption} ; then true else exit 1 +Index: b/src/libstdc++-v3/include/Makefile.am +=================================================================== +--- a/src/libstdc++-v3/include/Makefile.am ++++ b/src/libstdc++-v3/include/Makefile.am +@@ -829,8 +829,9 @@ + endif + + host_srcdir = ${glibcxx_srcdir}/$(OS_INC_SRCDIR) +-host_builddir = ./${host_alias}/bits +-host_installdir = ${gxx_include_dir}/${host_alias}$(MULTISUBDIR)/bits ++default_host_alias = @default_host_alias@ ++host_builddir = ./${default_host_alias}/bits ++host_installdir = ${gxx_include_dir}/${default_host_alias}$(MULTISUBDIR)/bits + host_headers = \ + ${host_srcdir}/ctype_base.h \ + ${host_srcdir}/ctype_inline.h \ +@@ -1050,6 +1051,7 @@ + + stamp-${host_alias}: + @-mkdir -p ${host_builddir} ++ @test ${default_host_alias} = ${host_alias} || ln -sf ${default_host_alias} ${host_alias} + @$(STAMP) stamp-${host_alias} + + # Host includes static. +Index: b/src/libstdc++-v3/include/Makefile.in +=================================================================== +--- a/src/libstdc++-v3/include/Makefile.in ++++ b/src/libstdc++-v3/include/Makefile.in +@@ -1082,8 +1082,9 @@ + # For --enable-cheaders=c_std + @GLIBCXX_C_HEADERS_COMPATIBILITY_TRUE@c_compatibility_headers_extra = ${c_compatibility_headers} + host_srcdir = ${glibcxx_srcdir}/$(OS_INC_SRCDIR) +-host_builddir = ./${host_alias}/bits +-host_installdir = ${gxx_include_dir}/${host_alias}$(MULTISUBDIR)/bits ++default_host_alias = @default_host_alias@ ++host_builddir = ./${default_host_alias}/bits ++host_installdir = ${gxx_include_dir}/${default_host_alias}$(MULTISUBDIR)/bits + host_headers = \ + ${host_srcdir}/ctype_base.h \ + ${host_srcdir}/ctype_inline.h \ +@@ -1461,6 +1462,7 @@ + + stamp-${host_alias}: + @-mkdir -p ${host_builddir} ++ @test ${default_host_alias} = ${host_alias} || ln -sf ${default_host_alias} ${host_alias} + @$(STAMP) stamp-${host_alias} + + # Host includes static. +Index: b/src/libstdc++-v3/configure.ac +=================================================================== +--- a/src/libstdc++-v3/configure.ac ++++ b/src/libstdc++-v3/configure.ac +@@ -458,6 +458,16 @@ + multilib_arg= + fi + ++AC_ARG_WITH(default-host-alias, ++[AS_HELP_STRING([--with-default-host-alias=TRIPLET], ++ [specifies host triplet used for the default multilib build])], ++[case "${withval}" in ++yes) AC_MSG_ERROR(bad value ${withval} given for default host triplet) ;; ++no) default_host_alias='${host_alias}' ;; ++*) default_host_alias=${withval} ;; ++esac],[default_host_alias='${host_alias}']) ++AC_SUBST(default_host_alias) ++ + # Export all the install information. + GLIBCXX_EXPORT_INSTALL_INFO + diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-arm-abi-conformance.diff gcc-4.6-4.6.4/debian/patches/gcc-arm-abi-conformance.diff --- gcc-4.6-4.6.2/debian/patches/gcc-arm-abi-conformance.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-arm-abi-conformance.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,591 @@ +# DP: Fix ARM ABI conformance regression. + +gcc/ + +2012-02-28 Richard Earnshaw + + * arm.c (aapcs_vfp_is_call_or_return_candidate): Only use the machine + mode if there is no type information available. + +gcc/testsuite/ + +2012-02-28 Ramana Radhakrishnan + + * gcc.target/arm/aapcs/vfp1.c (dg_do run): Run on all eabi variants. + * gcc.target/arm/aapcs/vfp2.c: Likewise. + * gcc.target/arm/aapcs/vfp3.c: Likewise. + * gcc.target/arm/aapcs/vfp4.c: Likewise. + * gcc.target/arm/aapcs/vfp5.c: Likewise. + * gcc.target/arm/aapcs/vfp6.c: Likewise. + * gcc.target/arm/aapcs/vfp7.c: Likewise. + * gcc.target/arm/aapcs/vfp8.c: Likewise. + * gcc.target/arm/aapcs/vfp9.c: Likewise. + * gcc.target/arm/aapcs/vfp10.c: Likewise. + * gcc.target/arm/aapcs/vfp11.c: Likewise. + * gcc.target/arm/aapcs/vfp12.c: Likewise. + * gcc.target/arm/aapcs/vfp13.c: Likewise. + * gcc.target/arm/aapcs/vfp14.c: Likewise. + * gcc.target/arm/aapcs/vfp15.c: Likewise. + * gcc.target/arm/aapcs/vfp16.c: Likewise. + * gcc.target/arm/aapcs/vfp17.c: Likewise. + * gcc.target/arm/neon-constants.h: New file. + * gcc.target/arm/aapcs/neon-constants.h: New file. + * gcc.target/arm/aapcs/neon-vect1.c: New test. + * gcc.target/arm/aapcs/neon-vect2.c: New test. + * gcc.target/arm/aapcs/neon-vect3.c: New test. + * gcc.target/arm/aapcs/neon-vect4.c: New test. + * gcc.target/arm/aapcs/neon-vect5.c: New test. + * gcc.target/arm/aapcs/neon-vect6.c: New test. + * gcc.target/arm/aapcs/neon-vect7.c: New test. + * gcc.target/arm/aapcs/neon-vect8.c: New test. + + +--- a/src/gcc/config/arm/arm.c 2012-03-06 13:24:25 +0000 ++++ b/src/gcc/config/arm/arm.c 2012-03-08 15:46:42 +0000 +@@ -4331,6 +4331,11 @@ + (TARGET_VFP_DOUBLE || !is_double)); + } + ++/* Return true if an argument whose type is TYPE, or mode is MODE, is ++ suitable for passing or returning in VFP registers for the PCS ++ variant selected. If it is, then *BASE_MODE is updated to contain ++ a machine mode describing each element of the argument's type and ++ *COUNT to hold the number of such elements. */ + static bool + aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type, +@@ -4338,9 +4343,20 @@ + { + enum machine_mode new_mode = VOIDmode; + +- if (GET_MODE_CLASS (mode) == MODE_FLOAT +- || GET_MODE_CLASS (mode) == MODE_VECTOR_INT +- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) ++ /* If we have the type information, prefer that to working things ++ out from the mode. */ ++ if (type) ++ { ++ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); ++ ++ if (ag_count > 0 && ag_count <= 4) ++ *count = ag_count; ++ else ++ return false; ++ } ++ else if (GET_MODE_CLASS (mode) == MODE_FLOAT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + *count = 1; + new_mode = mode; +@@ -4350,15 +4366,6 @@ + *count = 2; + new_mode = (mode == DCmode ? DFmode : SFmode); + } +- else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) +- { +- int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); +- +- if (ag_count > 0 && ag_count <= 4) +- *count = ag_count; +- else +- return false; +- } + else + return false; + +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h 2009-08-06 17:15:19 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h 2012-03-01 09:33:24 +0000 +@@ -1,3 +1,4 @@ ++ + #define IN_FRAMEWORK + + #ifdef VFP +@@ -10,6 +11,13 @@ + #define D6 48 + #define D7 56 + ++#ifdef NEON ++#define Q0 D0 ++#define Q1 D2 ++#define Q2 D4 ++#define Q3 D6 ++#endif ++ + #define S0 64 + #define S1 68 + #define S2 72 +@@ -27,24 +35,19 @@ + #define S14 120 + #define S15 124 + +-#define R0 128 +-#define R1 132 +-#define R2 136 +-#define R3 140 +- +-#define STACK 144 +- ++#define CORE_REG_START 128 + #else +- +-#define R0 0 +-#define R1 4 +-#define R2 8 +-#define R3 12 +- +-#define STACK 16 +- ++#define CORE_REG_START 0 + #endif + ++#define R0 CORE_REG_START ++#define R1 (R0 + 4) ++#define R2 (R1 + 4) ++#define R3 (R2 + 4) ++#define STACK (R3 + 4) ++ ++ ++ + extern void abort (void); + + __attribute__((naked)) void dumpregs () __asm("myfunc"); +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-constants.h 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-constants.h 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,33 @@ ++ ++ ++#include "arm_neon.h" ++ ++const int32x4_t i32x4_constvec1 = { 1101, 1102, 1103, 1104}; ++const int32x4_t i32x4_constvec2 = { 2101, 2102, 2103, 2104}; ++ ++#define ELEM(INDEX) .val[INDEX] ++ ++const int32x4x2_t i32x4x2_constvec1 = {ELEM(0) = {0xaddebccb,11,12,13}, ++ ELEM(1) = {14, 15, 16, 17} }; ++ ++const int32x4x2_t i32x4x2_constvec2 = { ELEM(0) = {0xaadebcca,11,12,13}, ++ ELEM(1) = {140, 15, 16, 17}}; ++ ++const int32x4x3_t i32x4x3_constvec1 = { ELEM(0) = {0xabbccdde,8, 9, 10}, ++ ELEM(1) = {0xabcccdde, 26, 27, 28}, ++ ELEM(2) = {0xaccccddf, 29, 30, 31}}; ++ ++const int32x4x3_t i32x4x3_constvec2 = { ELEM(0) = {0xbccccdd0,8, 9, 10}, ++ ELEM(1) = {0xbdfe1000, 26, 27, 28}, ++ ELEM(2) = {0xaccccddf, 29, 30, 31}}; ++const float32x4x2_t f32x4x2_constvec1 = ++ { ELEM(0) = { 7.101f, 0.201f, 0.301f, 0.401f} , ++ ELEM(1) = { 8.101f, 0.501f, 0.601f, 0.701f} }; ++ ++const float32x4x2_t f32x4x2_constvec2 = ++ { ELEM(0) = { 11.99f , 11.21f, 1.27f, 8.74f}, ++ ELEM(1) = { 13.45f , 1.23f ,1.24f, 1.26f}}; ++ ++const int32x2_t i32x2_constvec1 = { 1283, 1345 }; ++const int32x2x2_t i32x2x2_constvec1 = { ELEM(0) = { 0xabcdefab, 32 }, ++ ELEM(1) = { 0xabcdefbc, 33 }}; +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect1.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect1.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect1.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(double, 12.0, D3) /* Backfill this particular argument. */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++ARG(float, 5.0f, STACK+sizeof(int32x4x2_t)) /* No backfill allowed. */ ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect2.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect2.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,23 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect2.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1. */ ++ARG(float, 3.0f, S4) /* D2, Q1 occupied. */ ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect3.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect3.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,26 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect3.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++ARG(double, 11.0, STACK+sizeof(int32x4x2_t)) /* No backfill in D3. */ ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect4.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect4.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect4.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(double, 12.0, D3) /* Backfill this particular argument. */ ++ARG(float, 5.0f, S5) /* Backfill in S5. */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect5.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect5.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,28 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect5.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(float32x4x2_t, f32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(double, 12.0, D3) /* Backfill this particular argument. */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++ARG(float, 5.0f, STACK+sizeof(int32x4x2_t)) /* No backfill allowed. */ ++LAST_ARG(int, 3, R0) ++ ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect6.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect6.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,24 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect6.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(int32x4x3_t, i32x4x3_constvec1, Q1) /* Q1, Q2, Q3 */ ++ARG(int32x4x3_t, i32x4x3_constvec2, STACK) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect7.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect7.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect7.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(float, 24.3f, S0) /* S0 , D0, Q0 */ ++ARG(int32x4x3_t, i32x4x3_constvec1, Q1) /* Q1, Q2, Q3 */ ++ARG(double, 25.6, D1) ++ARG(float, 12.67f, S1) ++ARG(int32x4x3_t, i32x4x3_constvec2, STACK) ++ARG(double, 2.47, STACK+sizeof(int32x4x3_t)) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect8.c 1970-01-01 00:00:00 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect8.c 2012-03-01 09:33:24 +0000 +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect8.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(float, 24.3f, S0) /* S0 , D0, Q0 */ ++ARG(int32x2_t, i32x2_constvec1, D1) /* D1 */ ++ARG(double, 25.6, D2) ++ARG(float, 12.67f, S1) ++ARG(int32x4x3_t, i32x4x3_constvec2, STACK) ++ARG(double, 2.47, STACK+sizeof(int32x4x3_t)) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp1.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp1.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp10.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp10.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp11.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp11.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp12.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp12.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp13.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp13.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp14.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp14.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp15.c 2009-08-06 17:15:19 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp15.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp16.c 2009-08-06 17:15:19 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp16.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp17.c 2009-08-06 17:15:19 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp17.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp2.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp2.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp3.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp3.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp4.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp4.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp5.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp5.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp6.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp6.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp7.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp7.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp8.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp8.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp9.c 2009-08-06 13:27:45 +0000 ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp9.c 2012-03-01 09:33:24 +0000 +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-base-version.diff gcc-4.6-4.6.4/debian/patches/gcc-base-version.diff --- gcc-4.6-4.6.2/debian/patches/gcc-base-version.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-base-version.diff 2013-04-14 23:00:34.000000000 +0000 @@ -3,15 +3,15 @@ --- a/src/gcc/BASE-VER +++ b/src/gcc/BASE-VER @@ -1 +1 @@ --4.6.2 +-4.6.4 +4.6 ---- a/src/gcc/FULL-VER +--- /dev/null +++ b/src/gcc/FULL-VER @@ -0,0 +1 @@ -+4.6.2 ++4.6.4 --- a/src/gcc/Makefile.in +++ b/src/gcc/Makefile.in -@@ -834,11 +834,13 @@ +@@ -832,11 +832,13 @@ TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H) # Variables for version information. @@ -26,7 +26,7 @@ BASEVER_c := $(shell cat $(BASEVER)) DEVPHASE_c := $(shell cat $(DEVPHASE)) DATESTAMP_c := $(shell cat $(DATESTAMP)) -@@ -857,7 +859,7 @@ +@@ -855,7 +857,7 @@ # development phase collapsed to the empty string in release mode # (i.e. if DEVPHASE_c is empty). The space immediately after the # comma in the $(if ...) constructs is significant - do not remove it. @@ -35,7 +35,7 @@ DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\"" DATESTAMP_s := "\"$(if $(DEVPHASE_c), $(DATESTAMP_c))\"" PKGVERSION_s:= "\"@PKGVERSION@\"" -@@ -2187,9 +2189,9 @@ +@@ -2184,9 +2186,9 @@ $(MACHMODE_H) prefix.o: prefix.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) prefix.h \ @@ -47,7 +47,7 @@ -c $(srcdir)/prefix.c $(OUTPUT_OPTION) # Language-independent files. -@@ -2260,9 +2262,9 @@ +@@ -2257,9 +2259,9 @@ dumpvers: dumpvers.c @@ -59,7 +59,7 @@ -DREVISION=$(REVISION_s) \ -DDEVPHASE=$(DEVPHASE_s) -DPKGVERSION=$(PKGVERSION_s) \ -DBUGURL=$(BUGURL_s) -c $(srcdir)/version.c $(OUTPUT_OPTION) -@@ -2801,10 +2803,10 @@ +@@ -2798,10 +2800,10 @@ tree-ssa-alias.h $(TREE_FLOW_H) bversion.h: s-bversion; @true @@ -74,7 +74,7 @@ echo "#define BUILDING_GCC_VERSION (BUILDING_GCC_MAJOR * 1000 + BUILDING_GCC_MINOR)" >> bversion.h $(STAMP) s-bversion -@@ -3805,9 +3807,9 @@ +@@ -3802,9 +3804,9 @@ ## build/version.o is compiled by the $(COMPILER_FOR_BUILD) but needs ## several C macro definitions, just like version.o build/version.o: version.c version.h \ @@ -86,7 +86,7 @@ -DREVISION=$(REVISION_s) \ -DDEVPHASE=$(DEVPHASE_s) -DPKGVERSION=$(PKGVERSION_s) \ -DBUGURL=$(BUGURL_s) -o $@ $< -@@ -3968,7 +3970,7 @@ +@@ -3965,7 +3967,7 @@ cppbuiltin.o: cppbuiltin.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ cppbuiltin.h Makefile $(COMPILER) $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ @@ -95,7 +95,7 @@ -c $(srcdir)/cppbuiltin.c $(OUTPUT_OPTION) cppdefault.o: cppdefault.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ -@@ -3989,8 +3991,8 @@ +@@ -3986,8 +3988,8 @@ build/gcov-iov.o -o $@ gcov-iov.h: s-iov @@ -106,7 +106,7 @@ > tmp-gcov-iov.h $(SHELL) $(srcdir)/../move-if-change tmp-gcov-iov.h gcov-iov.h $(STAMP) s-iov -@@ -4214,8 +4216,8 @@ +@@ -4211,8 +4213,8 @@ TEXI_CPPINT_FILES = cppinternals.texi gcc-common.texi gcc-vers.texi # gcc-vers.texi is generated from the version files. @@ -117,7 +117,7 @@ if [ "$(DEVPHASE_c)" = "experimental" ]; \ then echo "@set DEVELOPMENT"; \ else echo "@clear DEVELOPMENT"; \ -@@ -4582,9 +4584,11 @@ +@@ -4579,9 +4581,11 @@ install-driver: installdirs xgcc$(exeext) -rm -f $(DESTDIR)$(bindir)/$(GCC_INSTALL_NAME)$(exeext) -$(INSTALL_PROGRAM) xgcc$(exeext) $(DESTDIR)$(bindir)/$(GCC_INSTALL_NAME)$(exeext) diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-cloog-dl.diff gcc-4.6-4.6.4/debian/patches/gcc-cloog-dl.diff --- gcc-4.6-4.6.2/debian/patches/gcc-cloog-dl.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-cloog-dl.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,7 +1,7 @@ # DP: Link against -ldl instead of -lcloog -lppl. Exit with an error when using # DP: the Graphite loop transformation infrastructure without having the -# DP: libcloog-ppl0 package installed. Packages using these optimizations -# DP: should build-depend on libcloog-ppl0. +# DP: libcloog-ppl[01] package installed. Packages using these optimizations +# DP: should build-depend on libcloog-ppl1 | libcloog-ppl0. 2011-01-04 Jakub Jelinek @@ -18,9 +18,11 @@ stmt_for argument to stmt_fora. * graphite-poly.h: Include graphite-cloog-util.h. ---- a/src/gcc/Makefile.in.jj 2011-01-03 13:44:14.163900902 +0100 -+++ b/src/gcc/Makefile.in 2011-01-04 17:48:53.588775911 +0100 -@@ -984,6 +984,8 @@ GCC_PLUGIN_H = gcc-plugin.h highlev-plug +Index: b/src/gcc/Makefile.in +=================================================================== +--- a/src/gcc/Makefile.in ++++ b/src/gcc/Makefile.in +@@ -999,6 +999,8 @@ PLUGIN_H = plugin.h $(GCC_PLUGIN_H) PLUGIN_VERSION_H = plugin-version.h configargs.h LIBFUNCS_H = libfuncs.h $(HASHTAB_H) @@ -29,7 +31,7 @@ # # Now figure out from those variables how to compile and link. -@@ -1037,7 +1039,7 @@ BUILD_LIBDEPS= $(BUILD_LIBIBERTY) +@@ -1052,7 +1054,7 @@ # and the system's installed libraries. LIBS = @LIBS@ $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBIBERTY) $(LIBDECNUMBER) \ $(HOST_LIBS) @@ -38,7 +40,7 @@ $(ZLIB) # Any system libraries needed just for GNAT. SYSLIBS = @GNAT_LIBEXC@ -@@ -2668,40 +2670,40 @@ sese.o : sese.c sese.h $(CONFIG_H) $(SYS +@@ -2684,40 +2686,40 @@ $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) tree-pass.h value-prof.h graphite.o : graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) sese.h \ @@ -93,7 +95,7 @@ graphite-sese-to-poly.h tree-vect-loop.o: tree-vect-loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \ -@@ -3482,6 +3484,11 @@ $(out_object_file): $(out_file) $(CONFIG +@@ -3499,6 +3501,11 @@ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ $(out_file) $(OUTPUT_OPTION) @@ -105,9 +107,11 @@ # Build auxiliary files that support ecoff format. mips-tfile: mips-tfile.o version.o $(LIBDEPS) $(LINKER) $(LINKERFLAGS) $(LDFLAGS) -o $@ \ ---- a/src/gcc/graphite-cloog-compat.h.jj 2011-01-03 12:53:05.000000000 +0100 -+++ b/src/gcc/graphite-cloog-compat.h 2011-01-04 17:34:09.857757544 +0100 -@@ -272,4 +272,277 @@ static inline int cloog_matrix_nrows (Cl +Index: b/src/gcc/graphite-cloog-compat.h +=================================================================== +--- a/src/gcc/graphite-cloog-compat.h ++++ b/src/gcc/graphite-cloog-compat.h +@@ -272,4 +272,277 @@ return m->NbRows; } #endif /* CLOOG_ORG */ @@ -385,9 +389,11 @@ + + #endif /* GRAPHITE_CLOOG_COMPAT_H */ ---- a/src/gcc/graphite.c.jj 2011-01-03 12:53:05.194056513 +0100 -+++ b/src/gcc/graphite.c 2011-01-04 16:18:32.385007767 +0100 -@@ -56,6 +56,35 @@ along with GCC; see the file COPYING3. +Index: b/src/gcc/graphite.c +=================================================================== +--- a/src/gcc/graphite.c ++++ b/src/gcc/graphite.c +@@ -56,6 +56,37 @@ CloogState *cloog_state; @@ -400,7 +406,9 @@ + + if (cloog_pointers__.inited) + return cloog_pointers__.h != NULL; -+ h = dlopen ("libcloog-ppl.so.0", RTLD_LAZY); ++ h = dlopen ("libcloog-ppl.so.1", RTLD_LAZY); ++ if (!h) ++ h = dlopen ("libcloog-ppl.so.0", RTLD_LAZY); + cloog_pointers__.h = h; + if (h == NULL) + return false; @@ -423,22 +431,24 @@ /* Print global statistics to FILE. */ static void -@@ -201,6 +230,12 @@ graphite_initialize (void) +@@ -201,6 +232,12 @@ return false; } + if (!init_cloog_pointers ()) + { -+ sorry ("Graphite loop optimizations can only be used if the libcloog-ppl0 package is installed"); ++ sorry ("Graphite loop optimizations can only be used if the libcloog-ppl1 or libcloog-ppl0 package is installed"); + return false; + } + scev_reset (); recompute_all_dominators (); initialize_original_copy_tables (); ---- a/src/gcc/graphite-clast-to-gimple.c.jj 2011-01-03 12:53:05.000000000 +0100 -+++ b/src/gcc/graphite-clast-to-gimple.c 2011-01-04 16:29:55.738007463 +0100 -@@ -738,10 +738,10 @@ clast_get_body_of_loop (struct clast_stm +Index: b/src/gcc/graphite-clast-to-gimple.c +=================================================================== +--- a/src/gcc/graphite-clast-to-gimple.c ++++ b/src/gcc/graphite-clast-to-gimple.c +@@ -738,10 +738,10 @@ from STMT_FOR. */ static tree @@ -451,9 +461,11 @@ struct clast_user_stmt *body = clast_get_body_of_loop (stmt); CloogStatement *cs = body->statement; poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs); ---- a/src/gcc/graphite-poly.h.jj 2011-01-03 12:53:05.000000000 +0100 -+++ b/src/gcc/graphite-poly.h 2011-01-04 17:35:53.308788629 +0100 -@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3. +Index: b/src/gcc/graphite-poly.h +=================================================================== +--- a/src/gcc/graphite-poly.h ++++ b/src/gcc/graphite-poly.h +@@ -22,6 +22,8 @@ #ifndef GCC_GRAPHITE_POLY_H #define GCC_GRAPHITE_POLY_H diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-d-lang.diff gcc-4.6-4.6.4/debian/patches/gcc-d-lang.diff --- gcc-4.6-4.6.2/debian/patches/gcc-d-lang.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-d-lang.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,7 +1,7 @@ # DP: Add D options and specs for the gcc driver. ---- /dev/null 2011-07-23 10:07:44.175344374 +0100 -+++ b/src/gcc/d/lang-specs.h 2011-07-09 23:51:52.807002983 +0100 +--- /dev/null ++++ b/src/gcc/d/lang-specs.h @@ -0,0 +1,53 @@ +/* GDC -- D front-end for GCC + Copyright (C) 2004 David Friedman @@ -56,8 +56,8 @@ + %{M} %{MM} %{!fsyntax-only:%(invoke_as)}}", D_D_SPEC, 1, 0 }, +#endif + ---- /dev/null 2011-07-23 10:07:44.175344374 +0100 -+++ b/src/gcc/d/lang.opt 2011-07-24 15:48:56.848040870 +0100 +--- /dev/null ++++ b/src/gcc/d/lang.opt @@ -0,0 +1,215 @@ +; GDC -- D front-end for GCC +; Copyright (C) 2004 David Friedman @@ -274,9 +274,9 @@ + +static_libphobos +Driver ---- a/src/gcc/gcc.c 2011-02-23 02:04:43.000000000 +0000 -+++ b/src/gcc/gcc.c 2011-07-12 21:55:05.805144355 +0100 -@@ -373,6 +373,7 @@ or with constant text in a single argume +--- a/src/gcc/gcc.c ++++ b/src/gcc/gcc.c +@@ -373,6 +373,7 @@ assembler has done its job. %D Dump out a -L option for each directory in startfile_prefixes. If multilib_dir is set, extra entries are generated with it affixed. @@ -284,7 +284,7 @@ %l process LINK_SPEC as a spec. %L process LIB_SPEC as a spec. %G process LIBGCC_SPEC as a spec. -@@ -5095,6 +5096,17 @@ do_spec_1 (const char *spec, int inswitc +@@ -5095,6 +5096,17 @@ return value; break; diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-gengtype-fix1.diff gcc-4.6-4.6.4/debian/patches/gcc-gengtype-fix1.diff --- gcc-4.6-4.6.2/debian/patches/gcc-gengtype-fix1.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-gengtype-fix1.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,46 @@ +# DP: Backport patch from the 4.7 branch, fixing gengtype memory issue. + +gcc/ + +2011-04-21 Dimitrios Apostolou + Jeff Law + + * gengtype-state.c (read_a_state_token): Fix argument to + obstack_free. + * gengtype.c (matching_file_name_substitute): Likewise. + +Index: gcc/gengtype-state.c +=================================================================== +--- a/src/gcc/gengtype-state.c (revision 172831) ++++ b/src/gcc/gengtype-state.c (revision 172832) +@@ -303,7 +303,7 @@ + obstack_1grow (&id_obstack, (char) 0); + ids = XOBFINISH (&id_obstack, char *); + sid = state_ident_by_name (ids, INSERT); +- obstack_free (&id_obstack, ids); ++ obstack_free (&id_obstack, NULL); + ids = NULL; + tk = XCNEW (struct state_token_st); + tk->stok_kind = STOK_NAME; +@@ -408,7 +408,7 @@ + tk->stok_file = state_path; + tk->stok_next = NULL; + strcpy (tk->stok_un.stok_string, cstr); +- obstack_free (&bstring_obstack, cstr); ++ obstack_free (&bstring_obstack, NULL); + + return tk; + } +Index: gcc/gengtype.c +=================================================================== +--- a/src/gcc/gengtype.c (revision 172831) ++++ b/src/gcc/gengtype.c (revision 172832) +@@ -1943,7 +1943,7 @@ + obstack_1grow (&str_obstack, '\0'); + rawstr = XOBFINISH (&str_obstack, char *); + str = xstrdup (rawstr); +- obstack_free (&str_obstack, rawstr); ++ obstack_free (&str_obstack, NULL); + DBGPRINTF ("matched replacement %s", str); + rawstr = NULL; + return str; diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-gengtype-fix2.diff gcc-4.6-4.6.4/debian/patches/gcc-gengtype-fix2.diff --- gcc-4.6-4.6.2/debian/patches/gcc-gengtype-fix2.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-gengtype-fix2.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,16 @@ +# DP: Write gengtype output to a temporary file before using it + +--- a/src/gcc/Makefile.in ++++ b/src/gcc/Makefile.in +@@ -3780,9 +3780,10 @@ + gtyp-input.list + # First, parse all files and save a state file. + $(RUN_GEN) build/gengtype$(build_exeext) $(GENGTYPE_FLAGS) \ +- -S $(srcdir) -I gtyp-input.list -w gtype.state ++ -S $(srcdir) -I gtyp-input.list -w tmp-gtype.state + # Second, read the state file and generate all files. This ensure that + # gtype.state is correctly read: ++ $(SHELL) $(srcdir)/../move-if-change tmp-gtype.state gtype.state + $(RUN_GEN) build/gengtype$(build_exeext) $(GENGTYPE_FLAGS) \ + -r gtype.state + $(STAMP) s-gtype diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-hash-style-both.diff gcc-4.6-4.6.4/debian/patches/gcc-hash-style-both.diff --- gcc-4.6-4.6.2/debian/patches/gcc-hash-style-both.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-hash-style-both.diff 2013-04-14 23:00:34.000000000 +0000 @@ -31,7 +31,7 @@ --- a/src/gcc/config/alpha/linux-elf.h +++ b/src/gcc/config/alpha/linux-elf.h -@@ -39,7 +39,7 @@ along with GCC; see the file COPYING3. If not see +@@ -41,7 +41,7 @@ #define ELF_DYNAMIC_LINKER LINUX_DYNAMIC_LINKER @@ -42,19 +42,19 @@ %{!shared: \ --- a/src/gcc/config/i386/linux.h +++ b/src/gcc/config/i386/linux.h -@@ -113,7 +113,7 @@ along with GCC; see the file COPYING3. If not see +@@ -104,7 +104,7 @@ { "dynamic_linker", LINUX_DYNAMIC_LINKER } #undef LINK_SPEC -#define LINK_SPEC "-m %(link_emulation) %{shared:-shared} \ +#define LINK_SPEC "-m %(link_emulation) --hash-style=both %{shared:-shared} \ %{!shared: \ - %{!ibcs: \ - %{!static: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ --- a/src/gcc/config/i386/linux64.h +++ b/src/gcc/config/i386/linux64.h -@@ -70,7 +70,7 @@ along with GCC; see the file COPYING3. If not see - #endif +@@ -78,7 +78,7 @@ + %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}" #undef LINK_SPEC -#define LINK_SPEC "%{" SPEC_64 ":-m elf_x86_64} %{" SPEC_32 ":-m elf_i386} \ @@ -64,7 +64,7 @@ %{!static: \ --- a/src/gcc/config/ia64/linux.h +++ b/src/gcc/config/ia64/linux.h -@@ -40,7 +40,7 @@ do { \ +@@ -64,7 +64,7 @@ #define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-ia64.so.2" #undef LINK_SPEC @@ -75,7 +75,7 @@ %{!static: \ --- a/src/gcc/config/rs6000/linux64.h +++ b/src/gcc/config/rs6000/linux64.h -@@ -386,11 +386,11 @@ +@@ -389,11 +389,11 @@ CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64) @@ -91,18 +91,18 @@ --- a/src/gcc/config/rs6000/sysv4.h +++ b/src/gcc/config/rs6000/sysv4.h -@@ -906,7 +906,7 @@ SVR4_ASM_SPEC \ +@@ -830,7 +830,7 @@ #define LINUX_DYNAMIC_LINKER \ CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER) -#define LINK_OS_LINUX_SPEC "-m elf32ppclinux %{!shared: %{!static: \ +#define LINK_OS_LINUX_SPEC "-m elf32ppclinux --hash-style=both %{!shared: %{!static: \ %{rdynamic:-export-dynamic} \ - %{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER "}}}" + -dynamic-linker " LINUX_DYNAMIC_LINKER "}}" --- a/src/gcc/config/s390/linux.h +++ b/src/gcc/config/s390/linux.h -@@ -77,7 +77,7 @@ along with GCC; see the file COPYING3. If not see +@@ -77,7 +77,7 @@ #undef LINK_SPEC #define LINK_SPEC \ @@ -113,7 +113,7 @@ %{static:-static} \ --- a/src/gcc/config/sparc/linux.h +++ b/src/gcc/config/sparc/linux.h -@@ -86,7 +86,7 @@ along with GCC; see the file COPYING3. If not see +@@ -74,7 +74,7 @@ #define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" #undef LINK_SPEC @@ -121,12 +121,12 @@ +#define LINK_SPEC "-m elf32_sparc --hash-style=both -Y P,/usr/lib %{shared:-shared} \ %{!mno-relax:%{!r:-relax}} \ %{!shared: \ - %{!ibcs: \ ---- a/src/gcc/config/arm/linux-elf.h~ 2009-02-20 16:20:38.000000000 +0100 -+++ b/src/gcc/config/arm/linux-elf.h 2009-12-21 13:19:36.000000000 +0100 -@@ -72,6 +72,7 @@ + %{!static: \ +--- a/src/gcc/config/arm/linux-elf.h ++++ b/src/gcc/config/arm/linux-elf.h +@@ -71,6 +71,7 @@ %{rdynamic:-export-dynamic} \ - %{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER "} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER " \ -X \ + --hash-style=both \ %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-ice-apport.diff gcc-4.6-4.6.4/debian/patches/gcc-ice-apport.diff --- gcc-4.6-4.6.2/debian/patches/gcc-ice-apport.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-ice-apport.diff 2013-04-14 23:00:34.000000000 +0000 @@ -3,7 +3,7 @@ --- a/src/gcc/gcc.c +++ b/src/gcc/gcc.c -@@ -6102,6 +6102,16 @@ +@@ -6106,6 +6106,16 @@ fnotice (stderr, "Preprocessed source stored into %s file," " please attach this to your bugreport.\n", temp_filenames[attempt * 2]); diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-ice-hack.diff gcc-4.6-4.6.4/debian/patches/gcc-ice-hack.diff --- gcc-4.6-4.6.2/debian/patches/gcc-ice-hack.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-ice-hack.diff 2013-04-14 23:00:34.000000000 +0000 @@ -34,7 +34,7 @@ static const char *getenv_spec_function (int, const char **); static const char *if_exists_spec_function (int, const char **); -@@ -2638,7 +2643,7 @@ +@@ -2639,7 +2642,7 @@ } } @@ -43,7 +43,7 @@ free (CONST_CAST (char *, string)); } -@@ -2691,6 +2696,16 @@ +@@ -2692,6 +2695,16 @@ else if (WIFEXITED (status) && WEXITSTATUS (status) >= MIN_FATAL_STATUS) { @@ -60,7 +60,7 @@ if (WEXITSTATUS (status) > greatest_status) greatest_status = WEXITSTATUS (status); ret_code = -1; -@@ -2748,6 +2763,9 @@ +@@ -2749,6 +2762,9 @@ } } @@ -70,7 +70,7 @@ return ret_code; } } -@@ -5874,6 +5892,227 @@ +@@ -5886,6 +5902,227 @@ switches[switchnum].validated = 1; } diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-linaro-doc.diff gcc-4.6-4.6.4/debian/patches/gcc-linaro-doc.diff --- gcc-4.6-4.6.2/debian/patches/gcc-linaro-doc.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-linaro-doc.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,8 +1,8 @@ -# DP: Changes for the Linaro 4.6-2011.12 release (documentation). +# DP: Changes for the Linaro 4.6-2013.04 release (documentation). --- a/src/gcc/doc/invoke.texi +++ b/src/gcc/doc/invoke.texi -@@ -8732,6 +8732,10 @@ +@@ -8728,6 +8728,10 @@ The maximum number of best instructions in the ready list that are considered for renaming in the selective scheduler. The default value is 2. @@ -13,7 +13,7 @@ @item max-last-value-rtl The maximum size measured as number of RTLs that can be recorded in an expression in combiner for a pseudo register as last known value of that register. The default -@@ -8911,6 +8915,11 @@ +@@ -8907,6 +8911,11 @@ The maximum number of namespaces to consult for suggestions when C++ name lookup fails for an identifier. The default is 1000. @@ -25,12 +25,14 @@ @end table @end table -@@ -10201,11 +10210,22 @@ +@@ -10196,12 +10205,23 @@ + @samp{arm10e}, @samp{arm1020e}, @samp{arm1022e}, @samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp}, @samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s}, - @samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15}, +-@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15}, -@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3}, -+@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, ++@samp{cortex-a5}, @samp{cortex-a7}, @samp{cortex-a8}, @samp{cortex-a9}, ++@samp{cortex-a15}, @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, +@samp{cortex-m4}, @samp{cortex-m3}, @samp{cortex-m1}, @samp{cortex-m0}, @@ -49,7 +51,7 @@ @item -mtune=@var{name} @opindex mtune This option is very similar to the @option{-mcpu=} option, except that -@@ -10217,6 +10237,18 @@ +@@ -10213,6 +10233,18 @@ For some ARM implementations better performance can be obtained by using this option. @@ -68,7 +70,7 @@ @item -march=@var{name} @opindex march This specifies the name of the target ARM architecture. GCC uses this -@@ -10230,6 +10262,11 @@ +@@ -10226,6 +10258,11 @@ @samp{armv7}, @samp{armv7-a}, @samp{armv7-r}, @samp{armv7-m}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}. @@ -175,7 +177,7 @@ Further canonicalization rules are defined in the function --- a/src/gcc/doc/tm.texi +++ b/src/gcc/doc/tm.texi -@@ -2533,7 +2533,7 @@ +@@ -2541,7 +2541,7 @@ register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when @var{x} is a floating-point constant. If the constant can't be loaded into any kind of register, code generation will be better if @@ -184,7 +186,7 @@ of using @code{TARGET_PREFERRED_RELOAD_CLASS}. If an insn has pseudos in it after register allocation, reload will go -@@ -2570,8 +2570,8 @@ +@@ -2578,8 +2578,8 @@ register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when @var{x} is a floating-point constant. If the constant can't be loaded into any kind of register, code generation will be better if @@ -195,7 +197,7 @@ If an insn has pseudos in it after register allocation, reload will go through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS} -@@ -4319,6 +4319,34 @@ +@@ -4327,6 +4327,34 @@ must have move patterns for this mode. @end deftypefn @@ -230,7 +232,7 @@ @deftypefn {Target Hook} bool TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P (enum machine_mode @var{mode}) Define this to return nonzero for machine modes for which the port has small register classes. If this target hook returns nonzero for a given -@@ -5577,13 +5605,13 @@ +@@ -5585,13 +5613,13 @@ @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook. @end defmac @@ -251,3 +253,55 @@ @deftypefn {Target Hook} rtx TARGET_DELEGITIMIZE_ADDRESS (rtx @var{x}) This hook is used to undo the possibly obfuscating effects of the +--- a/src/gcc/doc/tm.texi.in ++++ b/src/gcc/doc/tm.texi.in +@@ -2523,7 +2523,7 @@ + register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when + @var{x} is a floating-point constant. If the constant can't be loaded + into any kind of register, code generation will be better if +-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead ++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead + of using @code{TARGET_PREFERRED_RELOAD_CLASS}. + + If an insn has pseudos in it after register allocation, reload will go +@@ -2560,8 +2560,8 @@ + register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when + @var{x} is a floating-point constant. If the constant can't be loaded + into any kind of register, code generation will be better if +-@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead +-of using @code{PREFERRED_RELOAD_CLASS}. ++@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead ++of using @code{TARGET_PREFERRED_RELOAD_CLASS}. + + If an insn has pseudos in it after register allocation, reload will go + through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS} +@@ -4307,6 +4307,8 @@ + must have move patterns for this mode. + @end deftypefn + ++@hook TARGET_ARRAY_MODE_SUPPORTED_P ++ + @hook TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P + Define this to return nonzero for machine modes for which the port has + small register classes. If this target hook returns nonzero for a given +@@ -5557,13 +5559,13 @@ + @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook. + @end defmac + +-@defmac LEGITIMATE_CONSTANT_P (@var{x}) +-A C expression that is nonzero if @var{x} is a legitimate constant for +-an immediate operand on the target machine. You can assume that +-@var{x} satisfies @code{CONSTANT_P}, so you need not check this. In fact, +-@samp{1} is a suitable definition for this macro on machines where +-anything @code{CONSTANT_P} is valid. +-@end defmac ++@hook TARGET_LEGITIMATE_CONSTANT_P ++This hook returns true if @var{x} is a legitimate constant for a ++@var{mode}-mode immediate operand on the target machine. You can assume that ++@var{x} satisfies @code{CONSTANT_P}, so you need not check this. ++ ++The default definition returns true. ++@end deftypefn + + @hook TARGET_DELEGITIMIZE_ADDRESS + This hook is used to undo the possibly obfuscating effects of the diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-linaro-revert-106905-doc.diff gcc-4.6-4.6.4/debian/patches/gcc-linaro-revert-106905-doc.diff --- gcc-4.6-4.6.2/debian/patches/gcc-linaro-revert-106905-doc.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-linaro-revert-106905-doc.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,28 @@ +--- a/src/gcc/doc/tm.texi 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/doc/tm.texi 2011-07-01 09:19:21 +0000 +@@ -1118,14 +1118,6 @@ + If the value of this macro has a type, it should be an unsigned type. + @end defmac + +-@deftypefn {Target Hook} HOST_WIDE_INT TARGET_VECTOR_ALIGNMENT (const_tree @var{type}) +-This hook can be used to define the alignment for a vector of type +-@var{type}, in order to comply with a platform ABI. The default is to +-require natural alignment for vector types. The alignment returned by +-this hook must be a power-of-two multiple of the default alignment of +-the vector element type. +-@end deftypefn +- + @defmac STACK_SLOT_ALIGNMENT (@var{type}, @var{mode}, @var{basic-align}) + If defined, a C expression to compute the alignment for stack slot. + @var{type} is the data type, @var{mode} is the widest mode available, +--- a/src/gcc/doc/tm.texi.in 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/doc/tm.texi.in 2011-07-01 09:19:21 +0000 +@@ -1108,8 +1108,6 @@ + If the value of this macro has a type, it should be an unsigned type. + @end defmac + +-@hook TARGET_VECTOR_ALIGNMENT +- + @defmac STACK_SLOT_ALIGNMENT (@var{type}, @var{mode}, @var{basic-align}) + If defined, a C expression to compute the alignment for stack slot. + @var{type} is the data type, @var{mode} is the widest mode available, diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-linaro-revert-106905.diff gcc-4.6-4.6.4/debian/patches/gcc-linaro-revert-106905.diff --- gcc-4.6-4.6.2/debian/patches/gcc-linaro-revert-106905.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-linaro-revert-106905.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,290 @@ +--- a/src/gcc/config/arm/arm.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/config/arm/arm.c 2012-07-01 01:50:29 +0000 +@@ -258,7 +258,6 @@ + unsigned HOST_WIDE_INT); + static enum machine_mode arm_preferred_simd_mode (enum machine_mode); + static bool arm_class_likely_spilled_p (reg_class_t); +-static HOST_WIDE_INT arm_vector_alignment (const_tree type); + static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); + static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, + const_tree type, +@@ -613,9 +612,6 @@ + #undef TARGET_CLASS_LIKELY_SPILLED_P + #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p + +-#undef TARGET_VECTOR_ALIGNMENT +-#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment +- + #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE + #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ + arm_vector_alignment_reachable +@@ -24815,18 +24811,6 @@ + } + } + +-/* The AAPCS sets the maximum alignment of a vector to 64 bits. */ +-static HOST_WIDE_INT +-arm_vector_alignment (const_tree type) +-{ +- HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0); +- +- if (TARGET_AAPCS_BASED) +- align = MIN (align, 64); +- +- return align; +-} +- + static unsigned int + arm_autovectorize_vector_sizes (void) + { +--- a/src/gcc/stor-layout.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/stor-layout.c 2012-04-02 11:35:45 +0000 +@@ -1955,17 +1955,9 @@ + TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype), + bitsize_int (nunits), 0); + +- /* For vector types, we do not default to the mode's alignment. +- Instead, query a target hook, defaulting to natural alignment. +- This prevents ABI changes depending on whether or not native +- vector modes are supported. */ +- TYPE_ALIGN (type) = targetm.vector_alignment (type); +- +- /* However, if the underlying mode requires a bigger alignment than +- what the target hook provides, we cannot use the mode. For now, +- simply reject that case. */ +- gcc_assert (TYPE_ALIGN (type) +- >= GET_MODE_ALIGNMENT (TYPE_MODE (type))); ++ /* Always naturally align vectors. This prevents ABI changes ++ depending on whether or not native vector modes are supported. */ ++ TYPE_ALIGN (type) = tree_low_cst (TYPE_SIZE (type), 0); + break; + } + +--- a/src/gcc/target.def 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/target.def 2011-07-01 09:19:21 +0000 +@@ -1618,16 +1618,6 @@ + bool, (enum machine_mode mode), + hook_bool_mode_false) + +-DEFHOOK +-(vector_alignment, +- "This hook can be used to define the alignment for a vector of type\n\ +-@var{type}, in order to comply with a platform ABI. The default is to\n\ +-require natural alignment for vector types. The alignment returned by\n\ +-this hook must be a power-of-two multiple of the default alignment of\n\ +-the vector element type.", +- HOST_WIDE_INT, (const_tree type), +- default_vector_alignment) +- + /* True if we should try to use a scalar mode to represent an array, + overriding the usual MAX_FIXED_MODE limit. */ + DEFHOOK +--- a/src/gcc/targhooks.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/targhooks.c 2012-04-02 11:35:45 +0000 +@@ -979,13 +979,6 @@ + return id; + } + +-/* Default to natural alignment for vector types. */ +-HOST_WIDE_INT +-default_vector_alignment (const_tree type) +-{ +- return tree_low_cst (TYPE_SIZE (type), 0); +-} +- + bool + default_builtin_vector_alignment_reachable (const_tree type, bool is_packed) + { +--- a/src/gcc/targhooks.h 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/targhooks.h 2011-05-03 15:17:25 +0000 +@@ -85,8 +85,6 @@ + + extern tree default_builtin_reciprocal (unsigned int, bool, bool); + +-extern HOST_WIDE_INT default_vector_alignment (const_tree); +- + extern bool default_builtin_vector_alignment_reachable (const_tree, bool); + extern bool + default_builtin_support_vector_misalignment (enum machine_mode mode, +--- a/src/gcc/testsuite/gcc.dg/align-2.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/testsuite/gcc.dg/align-2.c 2004-10-19 18:21:41 +0000 +@@ -1,5 +1,5 @@ + /* PR 17962 */ +-/* { dg-do compile { target vect_natural_alignment } } */ ++/* { dg-do compile } */ + /* { dg-options "" } */ + + typedef float v4 __attribute__((vector_size(sizeof(float)*4))); +--- a/src/gcc/testsuite/gcc.dg/vect/slp-25.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/testsuite/gcc.dg/vect/slp-25.c 2011-09-19 07:44:24 +0000 +@@ -56,5 +56,5 @@ + + /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align || { ! vect_natural_alignment } } } } } */ ++/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2012-08-09 14:38:28 +0000 ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-1.c 2011-09-19 07:44:24 +0000 +@@ -48,6 +48,6 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */ ++/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_element_align } } } */ + /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2012-08-09 14:38:28 +0000 ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-2.c 2011-09-19 07:44:24 +0000 +@@ -49,6 +49,6 @@ + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */ +-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */ ++/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_element_align } } } */ ++/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target vect_element_align } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-3.c 2012-08-09 14:38:28 +0000 ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-3.c 2010-11-22 13:59:45 +0000 +@@ -4,7 +4,9 @@ + #include "tree-vect.h" + + #define N 128 +-#define RES 21640 ++#define RES 21888 ++ ++/* unaligned store. */ + + int ib[N+10]; + int ia[N+10]; +@@ -16,11 +18,11 @@ + int i, suma = 0, sumb = 0, sumc = 0; + + /* ib and ic have same misalignment, we peel to align them. */ +- for (i = 0; i <= N; i++) ++ for (i = 1; i <= N; i++) + { + suma += ia[i]; +- sumb += ib[i+5]; +- sumc += ic[i+1]; ++ sumb += ib[i+6]; ++ sumc += ic[i+2]; + } + + /* check results: */ +@@ -47,7 +49,7 @@ + return main1 (); + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */ +-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ ++/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2012-08-09 14:38:28 +0000 ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-4.c 2011-09-19 07:44:24 +0000 +@@ -16,13 +16,13 @@ + /* Don't peel keeping one load and the store aligned. */ + for (i = 0; i <= N; i++) + { +- ia[i] = ib[i] + ib[i+5]; ++ ia[i] = ib[i] + ib[i+6]; + } + + /* check results: */ + for (i = 1; i <= N; i++) + { +- if (ia[i] != ib[i] + ib[i+5]) ++ if (ia[i] != ib[i] + ib[i+6]) + abort (); + } + +@@ -44,7 +44,7 @@ + return main1 (); + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */ + /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/lib/target-supports.exp 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/testsuite/lib/target-supports.exp 2012-03-02 13:53:14 +0000 +@@ -3117,26 +3117,6 @@ + return $et_natural_alignment_64_saved + } + +-# Return 1 if all vector types are naturally aligned (aligned to their +-# type-size), 0 otherwise. +-# +-# This won't change for different subtargets so cache the result. +- +-proc check_effective_target_vect_natural_alignment { } { +- global et_vect_natural_alignment +- +- if [info exists et_vect_natural_alignment_saved] { +- verbose "check_effective_target_vect_natural_alignment: using cached result" 2 +- } else { +- set et_vect_natural_alignment_saved 1 +- if { [check_effective_target_arm_eabi] } { +- set et_vect_natural_alignment_saved 0 +- } +- } +- verbose "check_effective_target_vect_natural_alignment: returning $et_vect_natural_alignment_saved" 2 +- return $et_vect_natural_alignment_saved +-} +- + # Return 1 if vector alignment (for types of size 32 bit or less) is reachable, 0 otherwise. + # + # This won't change for different subtargets so cache the result. +--- a/src/gcc/tree-vect-data-refs.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/tree-vect-data-refs.c 2012-06-26 09:29:30 +0000 +@@ -1041,7 +1041,7 @@ + int misal = DR_MISALIGNMENT (dr); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + misal += negative ? -npeel * dr_size : npeel * dr_size; +- misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1; ++ misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1; + SET_DR_MISALIGNMENT (dr, misal); + return; + } +--- a/src/gcc/tree-vect-loop-manip.c 2012-08-07 18:13:10 +0000 ++++ b/src/gcc/tree-vect-loop-manip.c 2011-07-04 11:13:51 +0000 +@@ -1972,7 +1972,7 @@ + If the misalignment of DR is known at compile time: + addr_mis = int mis = DR_MISALIGNMENT (dr); + Else, compute address misalignment in bytes: +- addr_mis = addr & (vectype_align - 1) ++ addr_mis = addr & (vectype_size - 1) + + prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step) + +@@ -2029,10 +2029,9 @@ + tree ptr_type = TREE_TYPE (start_addr); + tree size = TYPE_SIZE (ptr_type); + tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); +- tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1); +- HOST_WIDE_INT elem_size = +- int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); +- tree elem_size_log = build_int_cst (type, exact_log2 (elem_size)); ++ tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); ++ tree elem_size_log = ++ build_int_cst (type, exact_log2 (vectype_align/nelements)); + tree nelements_minus_1 = build_int_cst (type, nelements - 1); + tree nelements_tree = build_int_cst (type, nelements); + tree byte_misalign; +@@ -2041,10 +2040,10 @@ + new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts); + gcc_assert (!new_bb); + +- /* Create: byte_misalign = addr & (vectype_align - 1) */ ++ /* Create: byte_misalign = addr & (vectype_size - 1) */ + byte_misalign = + fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), +- vectype_align_minus_1); ++ vectype_size_minus_1); + + /* Create: elem_misalign = byte_misalign / element_size */ + elem_misalign = diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-linaro.diff gcc-4.6-4.6.4/debian/patches/gcc-linaro.diff --- gcc-4.6-4.6.2/debian/patches/gcc-linaro.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-linaro.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,24 +1,897 @@ -# DP: Changes for the Linaro 4.6-2011.12 release. +# DP: Changes for the Linaro 4.6-2013.04 release. ---- a/src/ChangeLog -+++ b/src/ChangeLog -@@ -1,3 +1,13 @@ -+2011-11-20 Andreas Tobler +--- a/src/ChangeLog.linaro ++++ b/src/ChangeLog.linaro +@@ -0,0 +1,3956 @@ ++2013-04-08 Christophe Lyon + -+ * libtool.m4: Additional FreeBSD 10 fixes. ++ GCC Linaro 4.6-2013.04 released. + -+2011-11-18 Iain Sandoe ++ gcc/ ++ * LINARO-VERSION: Update. + -+ PR target/49992 -+ * configure.ac: Remove ranlib special-casing for Darwin. -+ * configure: Regenerate. ++2013-04-05 Matthew Gretton-Dann ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 197511) ++ ++2013-04-04 Matthew Gretton-Dann ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 197470) ++ ++2013-04-02 Matthew Gretton-Dann ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 197313) ++ ++2013-03-11 Matthew Gretton-Dann ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2013-03-11 Matthew Gretton-Dann ++ ++ GCC Linaro 4.6-2013.03 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2013-02-25 Matthew Gretton-Dann ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 196247) ++ ++2013-02-08 Christophe Lyon ++ ++ gcc/ ++ * LINARO: Bump version. ++ ++2013-02-08 Christophe Lyon ++ ++ GCC Linaro 4.6-2013.02 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2013-02-05 Yvan Roux ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 195744). ++ ++2013-01-15 Zhenqiang Chen ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2013-01-15 Zhenqiang Chen ++ ++ GCC Linaro 4.6-2013.01 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2013-01-02 Matthew Gretton-Dann ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 194771). ++ ++2012-12-12 Yvan Roux ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-12-12 Yvan Roux ++ ++ GCC Linaro 4.6-2012.12 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-12-10 Yvan Roux ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 194340). ++ ++2012-11-13 Matthew Gretton-Dann ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-11-12 Matthew Gretton-Dann ++ ++ GCC Linaro 4.6-2012.11 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-11-07 Michael Hope ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 193199). ++ ++2012-10-08 Matthew Gretton-Dann ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-10-08 Matthew Gretton-Dann ++ ++ GCC Linaro 4.6-2012.10 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-10-01 Matthew Gretton-Dann ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 191880) ++ ++2012-09-18 Matthew Gretton-Dann ++ ++ LP 1029454 ++ Backport from mainline r183524: ++ ++ gcc/ ++ 2012-01-25 Jakub Jelinek ++ ++ PR tree-optimization/51987 ++ * tree-data-ref.c (get_references_in_stmt): Handle references in ++ non-volatile GIMPLE_ASM. ++ ++ gcc/testsuite/ ++ 2012-01-25 Jakub Jelinek ++ ++ PR tree-optimization/51987 ++ * gcc.target/i386/pr51987.c: New test. ++ ++2012-09-12 Michael Hope ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-09-12 Michael Hope ++ ++ GCC Linaro 4.6-2012.09 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-09-11 Michael Hope ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 191000). ++ ++2012-08-13 Matthew Gretton-Dann ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-08-13 Matthew Gretton-Dann ++ ++ GCC Linaro 4.6-2012.08 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-08-10 Ulrich Weigand ++ ++ Backport from mainline: ++ ++ gcc/ ++ 2012-07-30 Ulrich Weigand ++ Richard Earnshaw ++ ++ * target.def (vector_alignment): New target hook. ++ * doc/tm.texi.in (TARGET_VECTOR_ALIGNMENT): Document new hook. ++ * doc/tm.texi: Regenerate. ++ * targhooks.c (default_vector_alignment): New function. ++ * targhooks.h (default_vector_alignment): Add prototype. ++ * stor-layout.c (layout_type): Use targetm.vector_alignment. ++ * config/arm/arm.c (arm_vector_alignment): New function. ++ (TARGET_VECTOR_ALIGNMENT): Define. ++ ++ * tree-vect-data-refs.c (vect_update_misalignment_for_peel): Use ++ vector type alignment instead of size. ++ * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Use ++ element type size directly instead of computing it from alignment. ++ Fix variable naming and comment. ++ ++ gcc/testsuite/ ++ 2012-07-30 Ulrich Weigand ++ ++ * lib/target-supports.exp ++ (check_effective_target_vect_natural_alignment): New function. ++ * gcc.dg/align-2.c: Only run on targets with natural alignment ++ of vector types. ++ * gcc.dg/vect/slp-25.c: Adjust tests for targets without natural ++ alignment of vector types. ++ ++ 2011-12-21 Michael Zolotukhin ++ ++ * gcc.dg/vect/vect-peel-1.c: Adjust test diag-scans to fix fail on AVX. ++ * gcc.dg/vect/vect-peel-2.c: Ditto. ++ ++ 2011-06-21 Ira Rosen ++ ++ PR testsuite/49443 ++ * gcc.dg/vect/vect-peel-3.c: Expect to fail on vect_no_align ++ targets. ++ * gcc.dg/vect/vect-peel-4.c: Likewise. ++ ++ 2011-06-14 Ira Rosen ++ ++ * gcc.dg/vect/vect-peel-3.c: Adjust misalignment values ++ for double-word vectors. ++ * gcc.dg/vect/vect-peel-4.c: Likewise. ++ ++2012-08-01 Michael Hope ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 189991). ++ ++2012-07-02 Ramana Radhakrishnan ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-07-02 Ramana Radhakrishnan ++ ++ GCC Linaro 4.6-2012.07 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-07-01 Michael Hope ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 189058). ++ ++2012-06-29 Ulrich Weigand ++ ++ LP 1010826 ++ ++ Backport from mainline: ++ ++ gcc/ ++ PR tree-optimization/53729 ++ PR tree-optimization/53636 ++ * tree-vect-slp.c (vect_slp_analyze_bb_1): Delay call to ++ vect_verify_datarefs_alignment until after statements have ++ been marked as relevant/irrelevant. ++ * tree-vect-data-refs.c (vect_verify_datarefs_alignment): ++ Skip irrelevant statements. ++ (vect_enhance_data_refs_alignment): Use STMT_VINFO_RELEVANT_P ++ instead of STMT_VINFO_RELEVANT. ++ (vect_get_data_access_cost): Do not check for supportable ++ alignment before calling vect_get_load_cost/vect_get_store_cost. ++ * tree-vect-stmts.c (vect_get_store_cost): Do not abort when ++ handling unsupported alignment. ++ (vect_get_load_cost): Likewise. ++ ++ gcc/ ++ PR tree-optimization/53636 ++ * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Verify ++ stride when doing basic-block vectorization. ++ ++ gcc/testsuite/ ++ PR tree-optimization/53636 ++ * gcc.target/arm/pr53636.c: New test. ++ ++2012-06-28 Ramana Radhakrishnan ++ ++ gcc/ ++ * config/arm/arm.c (neon_dereference_pointer): Fixup typos. ++ ++ gcc/testsuite/ ++ * gcc.target/arm/lp101329.c: Remove unneeded comment. ++ ++2012-06-21 Ramana Radhakrishnan ++ ++ LP 1013209 ++ gcc/ ++ * config/arm/arm.c (neon_dereference_pointer): Use fold_convert ++ to convert expression to the right type. This is a Linaro 4.6 only ++ patch as this was originally backported from FSF 4.7 and hence applies ++ only here. ++ ++ gcc/testsuite/ ++ * gcc.target/arm/lp101329.c: New test. ++ ++2012-06-14 Michael Hope ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-06-12 Michael Hope ++ ++ GCC Linaro 4.6-2012.06 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-06-11 Michael Hope ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 188320). ++ ++2012-05-22 Ramana Radhakrishnan ++ ++ Backport from mainline: ++ gcc/ ++ 2012-03-15 Ramana Radhakrishnan ++ ++ * config.gcc (target_type_format_char): New. Document it. Set it for ++ arm*-*-* . ++ ++2012-05-23 Ramana Radhakrishnan ++ ++ LP:990530 ++ gcc/ ++ 2012-03-12 Richard Guenther ++ * config/arm/arm.c (neon_dereference_pointer): Do not call ++ covert during RTL expansion. ++ ++2012-05-21 Michael Hope ++ ++ Backport from mainline r186859: ++ ++ gcc/ ++ 2012-04-26 Michael Hope ++ Richard Earnshaw ++ ++ * config/arm/linux-eabi.h (GLIBC_DYNAMIC_LINKER_SOFT_FLOAT): Define. ++ (GLIBC_DYNAMIC_LINKER_HARD_FLOAT): Define. ++ (GLIBC_DYNAMIC_LINKER_DEFAULT): Define. ++ (GLIBC_DYNAMIC_LINKER): Redefine to use the hard float path. ++ ++ Backport from mainline r187012: ++ ++ gcc/ ++ 2012-05-01 Richard Earnshaw ++ ++ * arm/linux-eabi.h (GLIBC_DYNAMIC_LINKER_DEFAULT): Avoid ifdef ++ comparing enumeration values. Update comments. ++ ++2012-05-15 Andrew Stubbs ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-05-15 Andrew Stubbs ++ ++ GCC Linaro 4.6-2012.05 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-05-08 Andrew Stubbs ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 187273). ++ ++2012-05-08 Ulrich Weigand ++ ++ LP 959242 ++ ++ Backport from mainline: ++ ++ 2012-05-04 Ulrich Weigand ++ ++ gcc/ ++ PR tree-optimization/52633 ++ * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Swap order of ++ vect_recog_widen_shift_pattern and vect_recog_over_widening_pattern. ++ (vect_recog_over_widening_pattern): Remove handling of code that was ++ already detected as over-widening pattern. Remove special handling ++ of "unsigned" cases. Instead, support general case of conversion ++ of the shift result to another type. ++ ++ gcc/testsuite/ ++ PR tree-optimization/52633 ++ * gcc.target/arm/pr52633.c: New test. ++ ++ gcc/ ++ * tree-vect-patterns.c (vect_single_imm_use): New function. ++ (vect_recog_widen_mult_pattern): Use it instead of open-coding loop. ++ (vect_recog_over_widening_pattern): Likewise. ++ (vect_recog_widen_shift_pattern): Likewise. ++ ++ gcc/ ++ * tree-vect-patterns.c (vect_same_loop_or_bb_p): New function. ++ (vect_handle_widen_op_by_const): Use it instead of open-coding test. ++ (vect_recog_widen_mult_pattern): Likewise. ++ (vect_operation_fits_smaller_type): Likewise. ++ (vect_recog_over_widening_pattern): Likewise. ++ (vect_recog_widen_shift_pattern): Add to vect_same_loop_or_bb_p test. ++ ++2012-04-16 Ulrich Weigand ++ ++ LP 972648 ++ ++ Backport from mainline: ++ ++ 2011-08-23 Richard Guenther ++ ++ gcc/ ++ * Makefile.in (tree-data-ref.o): Add tree-affine.h dependency. ++ * tree-affine.h (aff_comb_cannot_overlap_p): Declare. ++ * tree-affine.c (aff_comb_cannot_overlap_p): New function, moved ++ from ... ++ * tree-ssa-loop-im.c (cannot_overlap_p): ... here. ++ (mem_refs_may_alias_p): Adjust. ++ * tree-data-ref.h (dr_may_alias_p): Adjust. ++ * tree-data-ref.c: Include tree-affine.h. ++ (dr_analyze_indices): Do nothing for the non-loop case. ++ (dr_may_alias_p): Distinguish loop and non-loop case. Disambiguate ++ more cases in the non-loop case. ++ * graphite-sese-to-poly.c (write_alias_graph_to_ascii_dimacs): Adjust ++ calls to dr_may_alias_p. ++ (write_alias_graph_to_ascii_ecc): Likewise. ++ (write_alias_graph_to_ascii_dot): Likewise. ++ (build_alias_set_optimal_p): Likewise. ++ ++2012-04-13 Ulrich Weigand ++ ++ LP 968766 ++ ++ Backport from mainline: ++ ++ gcc/ ++ PR tree-optimization/52870 ++ * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Verify that ++ presumed pattern statement is within the same loop or basic block. ++ ++ gcc/testsuite/ ++ PR tree-optimization/52870 ++ * gcc.dg/vect/pr52870.c: New test. ++ ++2012-04-10 Andrew Stubbs ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-04-10 Andrew Stubbs ++ ++ GCC Linaro 4.6-2012.04 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-04-02 Andrew Stubbs ++ ++ Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 186060). ++ ++2012-03-26 Ulrich Weigand ++ ++ LP 960283 ++ LP 960274 ++ LP 960817 ++ ++ Backport from mainline: ++ ++ gcc/ ++ PR tree-optimization/52686 ++ * tree-vect-data-refs.c (vect_get_smallest_scalar_type): Handle ++ WIDEN_LSHIFT_EXPR. ++ ++ gcc/testsuite/ ++ PR tree-optimization/52686 ++ * gcc.target/arm/pr52686.c: New test. ++ ++2012-03-12 Andrew Stubbs ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-03-12 Andrew Stubbs ++ ++ GCC Linaro 4.6-2012.03 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-03-08 Ramana Radhakrishnan ++ ++ Backport from mainline. ++ 2012-02-28 Richard Earnshaw ++ ++ * arm.c (aapcs_vfp_is_call_or_return_candidate): Only use the machine ++ mode if there is no type information available. ++ ++ 2012-02-28 Ramana Radhakrishnan ++ ++ * gcc.target/arm/aapcs/vfp1.c (dg_do run): Run on all eabi variants. ++ * gcc.target/arm/aapcs/vfp2.c: Likewise. ++ * gcc.target/arm/aapcs/vfp3.c: Likewise. ++ * gcc.target/arm/aapcs/vfp4.c: Likewise. ++ * gcc.target/arm/aapcs/vfp5.c: Likewise. ++ * gcc.target/arm/aapcs/vfp6.c: Likewise. ++ * gcc.target/arm/aapcs/vfp7.c: Likewise. ++ * gcc.target/arm/aapcs/vfp8.c: Likewise. ++ * gcc.target/arm/aapcs/vfp9.c: Likewise. ++ * gcc.target/arm/aapcs/vfp10.c: Likewise. ++ * gcc.target/arm/aapcs/vfp11.c: Likewise. ++ * gcc.target/arm/aapcs/vfp12.c: Likewise. ++ * gcc.target/arm/aapcs/vfp13.c: Likewise. ++ * gcc.target/arm/aapcs/vfp14.c: Likewise. ++ * gcc.target/arm/aapcs/vfp15.c: Likewise. ++ * gcc.target/arm/aapcs/vfp16.c: Likewise. ++ * gcc.target/arm/aapcs/vfp17.c: Likewise. ++ * gcc.target/arm/neon-constants.h: New file. ++ * gcc.target/arm/aapcs/neon-constants.h: New file. ++ * gcc.target/arm/aapcs/neon-vect1.c: New test. ++ * gcc.target/arm/aapcs/neon-vect2.c: New test. ++ * gcc.target/arm/aapcs/neon-vect3.c: New test. ++ * gcc.target/arm/aapcs/neon-vect4.c: New test. ++ * gcc.target/arm/aapcs/neon-vect5.c: New test. ++ * gcc.target/arm/aapcs/neon-vect6.c: New test. ++ * gcc.target/arm/aapcs/neon-vect7.c: New test. ++ * gcc.target/arm/aapcs/neon-vect8.c: New test. ++ ++2012-03-08 Michael Hope ++ ++ Backport proposed patch: ++ ++ gcc/ ++ 2012-01-31 Richard Henderson ++ ++ * longlong.h [arm] (umul_ppmm): Use umull. Enable for thumb2. ++ [arm] (count_trailing_zeros): Use __builtin_ctz. ++ ++2012-03-06 Ulrich Weigand ++ ++ Backport from mainline: ++ ++ gcc/ ++ * config/arm/arm.c (arm_sat_operator_match): New function. ++ * config/arm/arm-protos.h (arm_sat_operator_match): Add prototype. ++ * config/arm/arm.md ("insn" attribute): Add "sat" value. ++ ("SAT", "SATrev"): New code iterators. ++ ("SATlo", "SAThi"): New code iterator attributes. ++ ("*satsi_"): New pattern. ++ ("*satsi__shift"): Likewise. ++ * config/arm/predicates.md (sat_shift_operator): New. ++ ++ gcc/testsuite/ ++ * gcc.target/arm/sat-1.c: New test. ++ ++2012-03-06 Ramana Radhakrishnan ++ ++ LP:942307 ++ gcc/ ++ PR target/50305 ++ * config/arm/arm.c (arm_legitimize_reload_address): Recognize ++ output of a previous pass through legitimize_reload_address. ++ Do not attempt to optimize addresses if the base register is ++ equivalent to a constant. ++ gcc/testsuite/ ++ PR target/50305 ++ * gcc.target/arm/pr50305.c: New test. ++ ++2012-03-02 Andrew Stubbs ++ ++ Merge from FSF GCC 4.6.3 Release (svn branches/gcc-4_6-branch 108680). ++ ++2012-02-24 Ramana Radhakrishnan ++ ++ Backport from mainline. ++ gcc/ ++ 2012-02-21 Matthew Gretton-Dann ++ ++ Revert r183011 ++ * config/arm/arm-cores.def (cortex-a15): Use generic Cortex tuning ++ parameters. ++ * config/arm/arm.c (arm_cortex_a15_tune): Remove. ++ ++ ++2012-02-24 Ramana Radhakrishnan ++ ++ LP:#922474 ++ gcc/ ++ * config/arm/sync.md (sync_lock_releasedi): Define. ++ (arm_sync_lock_releasedi): Likewise. ++ gcc/testsuite ++ Backport from mainline. ++ 2012-01-30 Greta Yorsh ++ * gcc.target/arm/di-longlong64-sync-withldrexd.c: Accept ++ new code generated for __sync_lock_release. ++ ++2012-02-24 Ramana Radhakrishnan ++ ++ 2011-12-05 Ramana Radhakrishnan ++ ++ gcc/ ++ * config/arm/arm.c (vfp3_const_double_for_fract_bits): Define. ++ * config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare. ++ * config/arm/constraints.md ("Dt"): New constraint. ++ * config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal): ++ New. ++ * config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New. ++ (*arm_combine_vcvt_f32_u32): New. ++ ++ LP:#900426 ++ ++ 2011-12-06 Ramana Radhakrishnan ++ * config/arm/vfp.md (*combine_vcvt_f64_): Fix ++ formatting character for vmov.f64 case. ++ ++2012-02-24 Ramana Radhakrishnan ++ ++ gcc/ ++ * config/arm/arm.c (arm_print_operand): Remove wrongly merged code. ++ (vfp3_const_double_for_fract_bits): Likewise. ++ ++2012-02-20 Andrew Stubbs ++ ++ LP:#936863 ++ gcc/ ++ * config/arm/arm.c (arm_print_operand): Avoid null-pointer ++ dereference from MEM_SIZE. ++ ++2012-02-08 Ulrich Weigand ++ ++ gcc/ ++ * config/arm/arm.c (arm_option_optimization_table): Enable ++ -fsched-pressure using -fsched-pressure-algorithm=model by ++ default when optimizing. ++ ++2012-02-08 Richard Sandiford ++ ++ gcc/ ++ * sched-deps.c (fixup_sched_groups): Rename to... ++ (chain_to_prev_insn): ...this. ++ (chain_to_prev_insn_p): New function. ++ (deps_analyze_insn): Use it instead of SCHED_GROUP_P. ++ ++2012-02-08 Richard Sandiford ++ ++ gcc/ ++ * sched-int.h (_haifa_insn_data): Move priority_status. ++ Add model_index. ++ (INSN_MODEL_INDEX): New macro. ++ * haifa-sched.c (insn_delay): New function. ++ (sched_regno_pressure_class): Update commentary. ++ (mark_regno_birth_or_death): Pass the liveness bitmap and ++ pressure array as arguments, instead of using curr_reg_live and ++ curr_reg_pressure. Only update the pressure if the bit in the ++ liveness set has changed. ++ (initiate_reg_pressure_info): Always trust the live-in set for ++ SCHED_PRESSURE_MODEL. ++ (initiate_bb_reg_pressure_info): Update call to ++ mark_regno_birth_or_death. ++ (dep_list_size): Take the list as argument. ++ (calculate_reg_deaths): New function, extracted from... ++ (setup_insn_reg_pressure_info): ...here. ++ (MODEL_BAR): New macro. ++ (model_pressure_data, model_insn_info, model_pressure_limit) ++ (model_pressure_group): New structures. ++ (model_schedule, model_worklist, model_insns, model_num_insns) ++ (model_curr_point, model_before_pressure, model_next_priority): ++ New variables. ++ (MODEL_PRESSURE_DATA, MODEL_MAX_PRESSURE, MODEL_REF_PRESSURE) ++ (MODEL_INSN_INFO, MODEL_INSN): New macros. ++ (model_index, model_update_limit_points_in_group): New functions. ++ (model_update_limit_points, model_last_use_except): Likewise. ++ (model_start_update_pressure, model_update_pressure): Likewise. ++ (model_recompute, model_spill_cost, model_excess_group_cost): Likewise. ++ (model_excess_cost, model_dump_pressure_points): Likewise. ++ (model_set_excess_costs): Likewise. ++ (rank_for_schedule): Extend SCHED_PRIORITY_WEIGHTED ordering to ++ SCHED_PRIORITY_MODEL. Use insn_delay. Use the order in the model ++ schedule as an alternative tie-breaker. Update the call to ++ dep_list_size. ++ (ready_sort): Call model_set_excess_costs. ++ (update_register_pressure): Update call to mark_regno_birth_or_death. ++ Rely on that function to check liveness rather than doing it here. ++ (model_classify_pressure, model_order_p, model_add_to_worklist_at) ++ (model_remove_from_worklist, model_add_to_worklist, model_promote_insn) ++ (model_add_to_schedule, model_analyze_insns, model_init_pressure_group) ++ (model_record_pressure, model_record_pressures): New functions. ++ (model_record_final_pressures, model_add_successors_to_worklist) ++ (model_promote_predecessors, model_choose_insn): Likewise. ++ (model_reset_queue_indices, model_dump_pressure_summary): Likewise. ++ (model_start_schedule, model_finalize_pressure_group): Likewise. ++ (model_end_schedule): Likewise. ++ (schedule_insn): Say when we're scheduling the next instruction ++ in the model schedule. ++ (schedule_insn): Handle SCHED_PRESSURE_MODEL. ++ (queue_to_ready): Do not add instructions that are ++ MAX_SCHED_READY_INSNS beyond the current point of the model schedule. ++ Always allow the next instruction in the model schedule to be added. ++ (debug_ready_list): Print the INSN_REG_PRESSURE_EXCESS_COST_CHANGE ++ and delay for SCHED_PRESSURE_MODEL too. ++ (prune_ready_list): Extend SCHED_PRIORITY_WEIGHTED handling to ++ SCHED_PRIORITY_MODEL, but also take the DFA into account. ++ (schedule_block): Call model_start_schedule and model_end_schedule. ++ Extend SCHED_PRIORITY_WEIGHTED stall handling to SCHED_PRIORITY_MODEL. ++ (sched_init): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling ++ to SCHED_PRESSURE_MODEL, but don't allocate saved_reg_live or ++ region_ref_regs. ++ (sched_finish): Update accordingly. ++ (fix_tick_ready): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling ++ to SCHED_PRESSURE_MODEL. ++ (add_jump_dependencies): Update call to dep_list_size. ++ (haifa_finish_h_i_d): Fix leak of max_reg_pressure. ++ (haifa_init_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling ++ to SCHED_PRESSURE_MODEL. ++ * sched-deps.c (init_insn_reg_pressure_info): Likewise, but don't ++ allocate INSN_MAX_REG_PRESSURE for SCHED_PRESSURE_MODEL. ++ (sched_analyze_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE ++ handling to SCHED_PRESSURE_MODEL. ++ ++2012-02-08 Richard Sandiford ++ ++ gcc/ ++ * common.opt (fsched-pressure-algorithm=): New option. ++ * flag-types.h (sched_pressure_algorithm): New enum. ++ * sched-int.h (sched_pressure_p): Replace with... ++ (sched_pressure): ...this new variable. ++ * haifa-sched.c (sched_pressure_p): Replace with... ++ (sched_pressure): ...this new variable. ++ (sched_regno_pressure_class, rank_for_schedule, ready_sort) ++ (update_reg_and_insn_max_reg_pressure, schedule_insn) ++ (debug_ready_list, schedule_block, sched_init, sched_finish) ++ (fix_tick_ready, haifa_init_insn): Update accordingly. ++ * sched-deps.c (init_insn_reg_pressure_info): Likewise. ++ * sched-rgn.c (schedule_region): Likewise. ++ ++2012-02-08 Richard Sandiford ++ ++ gcc/ ++ Backport from mainline: ++ ++ 2011-04-01 Bernd Schmidt ++ ++ * haifa-sched.c (prune_ready_list): New function, broken out of ++ schedule_block. ++ (schedule_block): Use it. ++ ++2012-02-07 Andrew Stubbs ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-02-07 Andrew Stubbs ++ ++ GCC Linaro 4.6-2012.02 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-02-01 Andrew Stubbs ++ ++ Merge from FSF GCC 4.6.2 (svn branches/gcc-4_6-branch 183786). ++ ++2012-01-20 Ramana Radhakrishnan ++ ++ Backport from mainline ++ 2012-01-20 Ramana Radhakrishnan ++ ++ PR target/51819 ++ * config/arm/arm.c (arm_print_operand): Correct output of alignment ++ hints for neon loads and stores. ++ ++2012-01-16 Michael Hope ++ ++ Backport from mainline r181210: ++ ++ gcc/ ++ 2011-11-07 Matthew Gretton-Dann ++ ++ * config/arm/arm-cores.def: Add -mcpu=cortex-a7. ++ * config/arm/arm-tables.opt: Regenerate. ++ * config/arm/arm-tune.md: Likewise. ++ * config/arm/bpabi.h (BE8_LINK_SPEC): Add Cortex A-7. ++ * doc/invoke.texi: Document -mcpu=cortex-a7. ++ ++2012-01-16 Michael Hope ++ ++ Backport from mainline r182561: ++ ++ 2011-12-20 Richard Henderson ++ ++ gcc/ ++ * config/arm/arm.md (*arm_cmpdi_unsigned): Enable for thumb2. ++ * config/arm/arm.c (arm_select_cc_mode): Use it. ++ ++2012-01-16 Michael Hope ++ ++ Backport from mainline r183011: ++ ++ 2012-01-09 Matthew Gretton-Dann ++ ++ * config/arm/arm-cores.def (cortex-a15): Use cortex_a15_tune for ++ tuning parameters. ++ * config/arm/arm.c (arm_cortex_a15_tune): New static variable. ++ ++2012-01-18 Michael Hope ++ ++ Backport from mainline r183126: ++ ++ 2012-01-12 Ira Rosen ++ ++ gcc/ ++ PR tree-optimization/51799 ++ * tree-vect-patterns.c (vect_recog_over_widening_pattern): Check ++ that the last operation is a type demotion. ++ ++ gcc/testsuite/ ++ * gcc.dg/vect/pr51799.c: New test. ++ * gcc.dg/vect/vect-widen-shift-u8.c: Expect two widening shift ++ patterns. ++ ++2012-01-12 Ulrich Weigand ++ ++ LP 879725 ++ Backport from mainline: ++ ++ 2012-01-02 Revital Eres ++ ++ gcc/ ++ * ddg.c (def_has_ccmode_p): New function. ++ (add_cross_iteration_register_deps, ++ create_ddg_dep_from_intra_loop_link): Call it. ++ ++ gcc/testsuite/ ++ * gcc.dg/sms-11.c: New file. ++ ++2012-01-11 Andrew Stubbs ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. ++ ++2012-01-11 Andrew Stubbs ++ ++ GCC Linaro 4.6-2012.01 released. ++ ++ gcc/ ++ * LINARO-VERSION: Update. ++ ++2012-01-06 Andrew Stubbs ++ ++ Backport from mainline: ++ ++ 2012-01-06 Andrew Stubbs ++ ++ gcc/testsuite/ ++ * gcc.target/arm/headmerge-2.c: Adjust scan pattern. ++ ++2012-01-05 Andrew Stubbs ++ ++ Merge from FSF GCC 4.6.2 (svn branches/gcc-4_6-branch 182894). ++ ++2012-01-05 Michael Hope ++ ++ Backport from mainline r182271: ++ ++ 2011-12-13 Revital Eres ++ ++ gcc/ ++ * modulo-sched.c (mark_loop_unsched): Free bbs. ++ ++2011-12-30 Richard Sandiford ++ ++ gcc/ ++ Backport from mainline: ++ ++ 2011-10-12 Richard Sandiford ++ ++ * expr.h (copy_blkmode_to_reg): Declare. ++ * expr.c (copy_blkmode_to_reg): New function. ++ (expand_assignment): Don't expand register RESULT_DECLs before ++ the lhs. Use copy_blkmode_to_reg to copy BLKmode values into a ++ RESULT_DECL register. ++ (expand_expr_real_1): Handle BLKmode decls when looking for promotion. ++ * stmt.c (expand_return): Move BLKmode-to-register code into ++ copy_blkmode_to_reg. ++ ++2011-12-20 Ira Rosen ++ ++ Backport from mainline: ++ ++ 2011-11-29 Ira Rosen ++ ++ PR tree-optimization/51301 ++ gcc/ ++ * tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that ++ the last statement doesn't convert to a bigger type than the original ++ type of the computation. ++ ++ gcc/testsuite/ ++ * gcc.dg/vect/pr51301.c: New test. ++ ++2011-12-06 Andrew Stubbs ++ ++ gcc/ ++ * LINARO-VERSION: Bump version. + - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/ChangeLog.linaro -+++ b/src/ChangeLog.linaro -@@ -0,0 +1,3067 @@ +2011-12-06 Andrew Stubbs + + GCC Linaro 4.6-2011.12 released. @@ -3086,785 +3959,69 @@ + * LINARO-VERSION: New file. + +Imported GCC from FSF trunk SVN revision 170067. ---- a/src/boehm-gc/ChangeLog -+++ b/src/boehm-gc/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager +--- a/src/gcc/builtins.c ++++ b/src/gcc/builtins.c +@@ -264,7 +264,14 @@ + } - * GCC 4.6.2 released. ---- a/src/boehm-gc/configure -+++ b/src/boehm-gc/configure -@@ -9604,7 +9604,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -10520,7 +10520,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -10538,7 +10538,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -12428,7 +12428,7 @@ - esac - ;; - -- freebsd[12]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - ld_shlibs_CXX=no -@@ -14203,7 +14203,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -14221,7 +14221,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/configure -+++ b/src/configure -@@ -6944,10 +6944,6 @@ - extra_arflags_for_target=" -X32_64" - extra_nmflags_for_target=" -B -X32_64" - ;; -- *-*-darwin[3-9]*) -- # ranlib before Darwin10 requires the -c flag to look at common symbols. -- extra_ranlibflags_for_target=" -c" -- ;; - mips*-*-pe | sh*-*-pe | *arm-wince-pe) - target_makefile_frag="config/mt-wince" - ;; ---- a/src/configure.ac -+++ b/src/configure.ac -@@ -2428,10 +2428,6 @@ - extra_arflags_for_target=" -X32_64" - extra_nmflags_for_target=" -B -X32_64" - ;; -- *-*-darwin[[3-9]]*) -- # ranlib before Darwin10 requires the -c flag to look at common symbols. -- extra_ranlibflags_for_target=" -c" -- ;; - mips*-*-pe | sh*-*-pe | *arm-wince-pe) - target_makefile_frag="config/mt-wince" - ;; ---- a/src/gcc/ChangeLog -+++ b/src/gcc/ChangeLog -@@ -1,3 +1,161 @@ -+2011-11-25 Richard Sandiford -+ -+ Backport from mainline: -+ -+ 2011-03-29 Richard Sandiford -+ -+ PR debug/48190 -+ * dwarf2out.c (dw_loc_list_node): Add resolved_addr and replaced. -+ (cached_dw_loc_list_def): New structure. -+ (cached_dw_loc_list): New typedef. -+ (cached_dw_loc_list_table): New variable. -+ (cached_dw_loc_list_table_hash): New function. -+ (cached_dw_loc_list_table_eq): Likewise. -+ (add_location_or_const_value_attribute): Take a bool cache_p. -+ Cache the list when the parameter is true. -+ (gen_formal_parameter_die): Update caller. -+ (gen_variable_die): Likewise. -+ (dwarf2out_finish): Likewise. -+ (dwarf2out_abstract_function): Nullify cached_dw_loc_list_table -+ while generating debug info for the decl. -+ (dwarf2out_function_decl): Clear cached_dw_loc_list_table. -+ (dwarf2out_init): Initialize cached_dw_loc_list_table. -+ (resolve_addr): Cache the result of resolving a chain of -+ location lists. -+ -+2011-11-24 Enkovich Ilya -+ -+ PR target/51287 -+ * i386.c (distance_non_agu_define): Fix insn attr check. -+ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ -+2011-11-19 Eric Botcazou -+ -+ PR rtl-optimization/51187 -+ * reorg.c (relax_delay_slots): Do not consider a jump useless if there -+ is a barrier between the jump and its target label. -+ -+2011-11-19 Richard Earnshaw -+ -+ PR target/50493 -+ * config/arm/arm.c (neon_disambiguate_copy): Correctly handle partial -+ overlap of src and dest operands. -+ -+2011-11-18 Iain Sandoe -+ -+ PR target/49992 -+ * configure.ac: Remove ranlib special-casing for Darwin. -+ * configure: Regenerate. -+ -+2011-11-16 Richard Earnshaw -+ Bernd Schmidt -+ Sebastian Huber -+ -+ PR target/49641 -+ * config/arm/arm.c (store_multiple_sequence): Avoid cases where -+ the base reg is stored iff compiling for Thumb1. -+ -+2011-11-13 Iain Sandoe -+ -+ PR target/48108 -+ Backport from mainline r180523 -+ * config/darwin.c (top level): Amend comments concerning LTO output. -+ (lto_section_num): New variable. (darwin_lto_section_e): New GTY. -+ (LTO_SECTS_SECTION, LTO_INDEX_SECTION): New. -+ (LTO_NAMES_SECTION): Rename. -+ (darwin_asm_named_section): Record LTO section counts and switches -+ in a vec of darwin_lto_section_e. -+ (darwin_file_start): Remove unused code. -+ (darwin_file_end): Put an LTO section termination label. Handle -+ output of the wrapped LTO sections, index and names table. -+ -+2011-11-12 Iain Sandoe -+ -+ PR target/45233 -+ * config/rs6000/rs6000.c (rs6000_legitimize_reload_address): -+ Only expand a symbol ref. into an access when the entity is defined -+ in the TU. -+ -+2011-11-10 Jakub Jelinek -+ -+ PR middle-end/51077 -+ * tree-object-size.c (addr_object_size): Check TREE_CODE of -+ MEM_REF's operand rather than code of the MEM_REF itself. -+ -+2011-11-07 Alan Modra -+ -+ PR target/30282 -+ * config/rs6000/rs6000.c (rs6000_emit_stack_reset): Always emit -+ blockage for ABI_V4. -+ -+2011-11-04 Eric Botcazou -+ -+ PR c++/50608 -+ * c-parser.c (c_parser_postfix_expression) : Adjust call -+ to fold_offsetof. -+ * c-typeck.c (build_unary_op) : Call fold_offsetof_1. -+ -+2011-11-04 Eric Botcazou -+ -+ PR target/50979 -+ * config/sparc/sparc.h (ASM_CPU_SPEC): Pass -Av8 if -mcpu=v8. -+ -+2011-11-03 Uros Bizjak -+ -+ * config/i386/i386.md (lround2, -+ rint2, floor2, lfloor2, -+ btrunc2, lwp_lwpval3): Use operands[N] instead of operandN. -+ -+2011-11-02 Eric Botcazou -+ -+ PR target/50945 -+ * config/sparc/sparc.md (movsf_insn): Reindent constraints. -+ (movsf_insn_no_fpu): Likewise. -+ (movdf_insn_sp32): Likewise. -+ (movdf_insn_sp32_no_fpu): Likewise. -+ (movdf_insn_sp32_v9): Likewise. Remove redundant GY constraint. -+ (movdf_insn_sp32_v9_no_fpu): Likewise. -+ (movdf_insn_sp64): Likewise. -+ (movdf_insn_sp64_no_fpu): Likewise. -+ (movtf_insn_sp32): Likewise. -+ (movtf_insn_sp32_no_fpu): Likewise. -+ (movtf_insn_sp64): Likewise. -+ (movtf_insn_sp64_hq): Likewise. -+ (movtf_insn_sp64_no_fpu): Likewise. -+ -+2011-11-02 Bernd Schmidt -+ -+ * cfgcleanup.c (try_head_merge_bb): If get_condition returns -+ NULL for a jump that is a cc0 insn, pick the previous insn for -+ move_before. -+ -+2011-11-01 Uros Bizjak -+ -+ * config/i386/i386.md (splitters for int-float conversion): Use -+ SUBREG_REG on SUBREGs in splitter constraints. -+ -+2011-11-01 Julian Brown -+ -+ PR rtl-optimization/47918 -+ * reload1.c (set_initial_label_offsets): Use initial offsets -+ for labels on the nonlocal_goto_handler_labels chain. -+ -+2011-10-29 John David Anglin -+ -+ PR target/50691 -+ * config/pa/pa.c (emit_move_sequence): Legitimize TLS symbol references. -+ * config/pa/pa.h (LEGITIMATE_CONSTANT_P): Return false for -+ TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC symbol references. -+ -+2011-10-27 Uros Bizjak -+ -+ PR target/50875 -+ * config/i386/sse.md (*avx_unpcklpd256): Remove extra insn -+ constraints. Change alternative 1 to "x,m,1". + /* Return the alignment in bits of EXP, an object. +- Don't return more than MAX_ALIGN no matter what. */ ++ Don't return more than MAX_ALIGN no matter what. + - 2011-10-26 Release Manager ++ Note that the address (and thus the alignment) computed here is based ++ on the address to which a symbol resolves, whereas DECL_ALIGN is based ++ on the address at which an object is actually located. These two ++ addresses are not always the same. For example, on ARM targets, ++ the address &foo of a Thumb function foo() has the lowest bit set, ++ whereas foo() itself starts on an even address. */ - * GCC 4.6.2 released. -@@ -144,8 +307,8 @@ + unsigned int + get_object_alignment (tree exp, unsigned int max_align) +@@ -286,7 +293,21 @@ + exp = DECL_INITIAL (exp); + if (DECL_P (exp) + && TREE_CODE (exp) != LABEL_DECL) +- align = DECL_ALIGN (exp); ++ { ++ if (TREE_CODE (exp) == FUNCTION_DECL) ++ { ++ /* Function addresses can encode extra information besides their ++ alignment. However, if TARGET_PTRMEMFUNC_VBIT_LOCATION ++ allows the low bit to be used as a virtual bit, we know ++ that the address itself must be 2-byte aligned. */ ++ if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) ++ align = 2 * BITS_PER_UNIT; ++ else ++ align = BITS_PER_UNIT; ++ } ++ else ++ align = DECL_ALIGN (exp); ++ } + else if (CONSTANT_CLASS_P (exp)) + { + align = TYPE_ALIGN (TREE_TYPE (exp)); +--- a/src/gcc/calls.c ++++ b/src/gcc/calls.c +@@ -686,7 +686,7 @@ + /* If the value is a non-legitimate constant, force it into a + pseudo now. TLS symbols sometimes need a call to resolve. */ + if (CONSTANT_P (args[i].value) +- && !LEGITIMATE_CONSTANT_P (args[i].value)) ++ && !targetm.legitimate_constant_p (args[i].mode, args[i].value)) + args[i].value = force_reg (args[i].mode, args[i].value); - 2011-10-07 Bernd Schmidt + /* If we are to promote the function arg to a wider mode, +@@ -3578,7 +3578,8 @@ -- PR target/49049 -- * config/arm/arm.md (arm_subsi3_insn): Lose the last alternative. -+ PR target/49049 -+ * config/arm/arm.md (arm_subsi3_insn): Lose the last alternative. + /* Make sure it is a reasonable operand for a move or push insn. */ + if (!REG_P (addr) && !MEM_P (addr) +- && ! (CONSTANT_P (addr) && LEGITIMATE_CONSTANT_P (addr))) ++ && !(CONSTANT_P (addr) ++ && targetm.legitimate_constant_p (Pmode, addr))) + addr = force_operand (addr, NULL_RTX); - 2011-10-06 Jakub Jelinek - ---- a/src/gcc/DATESTAMP -+++ b/src/gcc/DATESTAMP -@@ -1 +1 @@ --20111026 -+20111201 ---- a/src/gcc/LINARO-VERSION -+++ b/src/gcc/LINARO-VERSION -@@ -0,0 +1 @@ -+4.6-2011.12 ---- a/src/gcc/Makefile.in -+++ b/src/gcc/Makefile.in -@@ -888,6 +888,8 @@ - READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h - PARAMS_H = params.h params.def - BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def -+INTERNAL_FN_DEF = internal-fn.def -+INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) - TREE_H = tree.h all-tree.def tree.def c-family/c-common.def \ - $(lang_tree_files) $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \ - $(INPUT_H) statistics.h $(VEC_H) treestruct.def $(HASHTAB_H) \ -@@ -897,7 +899,7 @@ - BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h - GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \ - $(GGC_H) $(BASIC_BLOCK_H) $(TARGET_H) tree-ssa-operands.h \ -- tree-ssa-alias.h vecir.h -+ tree-ssa-alias.h vecir.h $(INTERNAL_FN_H) - GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h - COVERAGE_H = coverage.h $(GCOV_IO_H) - DEMANGLE_H = $(srcdir)/../include/demangle.h -@@ -1269,6 +1271,7 @@ - init-regs.o \ - input.o \ - integrate.o \ -+ internal-fn.o \ - intl.o \ - ira.o \ - ira-build.o \ -@@ -2422,7 +2425,8 @@ - tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ - $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \ -- $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h -+ $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \ -+ $(TREE_DATA_REF_H) - tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \ - $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \ -@@ -2750,6 +2754,8 @@ - $(TM_H) $(TREE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \ - $(TREE_PASS_H) tree-ssa-propagate.h tree-pretty-print.h \ - gimple-pretty-print.h -+internal-fn.o : internal-fn.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ -+ $(GIMPLE_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) - gimple.o : gimple.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TREE_H) \ - $(GGC_H) $(GIMPLE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) gt-gimple.h \ - $(TREE_FLOW_H) value-prof.h $(FLAGS_H) $(DEMANGLE_H) \ ---- a/src/gcc/ada/ChangeLog -+++ b/src/gcc/ada/ChangeLog -@@ -1,3 +1,33 @@ -+2011-11-13 Iain Sandoe -+ -+ Backport from mainline r181474 -+ PR target/50678 -+ * init.c (__gnat_error_handler) [Darwin]: Move work-around to the -+ bug filed as radar #10302855 from __gnat_error_handler ... -+ ... to (__gnat_adjust_context_for_raise) [Darwin]: New. -+ (HAVE_GNAT_ADJUST_CONTEXT_FOR_RAISE) [Darwin]: Define. -+ (__gnat_error_handler) [Darwin]: Use __gnat_adjust_context_for_raise. -+ -+2011-11-18 Tristan Gingold -+ Iain Sandoe -+ -+ PR target/49992 -+ * mlib-tgt-specific-darwin.adb (Archive_Indexer_Options): Remove. -+ * gcc-interface/Makefile.in (darwin): Remove ranlib special-casing -+ for Darwin. -+ -+2011-11-13 Iain Sandoe -+ -+ Backport from mainline r181319 -+ * gcc-interface/Makefile.in (stamp-gnatlib-$(RTSDIR)): Don't link -+ s-oscons.ads. -+ (OSCONS_CPP, OSCONS_EXTRACT): New. -+ (./bldtools/oscons/xoscons): New Target. -+ ($(RTSDIR)/s-oscons.ads): New Target. -+ (gnatlib): Depend on $(RTSDIR)/s-oscons.ads. -+ * Make-generated.in: Remove machinery to generate xoscons and -+ ada/s-oscons.ads. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/gcc/ada/Make-generated.in -+++ b/src/gcc/ada/Make-generated.in -@@ -64,37 +64,6 @@ - $(CP) $^ $(ADA_GEN_SUBDIR)/bldtools/nmake_s - (cd $(ADA_GEN_SUBDIR)/bldtools/nmake_s; gnatmake -q xnmake ; ./xnmake -s ../../nmake.ads ) - --ifeq ($(strip $(filter-out alpha64 ia64 dec hp vms% openvms% alphavms%,$(subst -, ,$(host)))),) --OSCONS_CPP=../../../$(DECC) -E /comment=as_is -DNATIVE \ -- -DTARGET='""$(target)""' s-oscons-tmplt.c -- --OSCONS_EXTRACT=../../../$(DECC) -DNATIVE \ -- -DTARGET='""$(target)""' s-oscons-tmplt.c ; \ -- ld -o s-oscons-tmplt.exe s-oscons-tmplt.obj; \ -- ./s-oscons-tmplt.exe > s-oscons-tmplt.s -- --else --# GCC_FOR_TARGET has paths relative to the gcc directory, so we need to ajust --# for running it from $(ADA_GEN_SUBDIR)/bldtools/oscons --OSCONS_CC=`echo "$(GCC_FOR_TARGET)" \ -- | sed -e 's^\./xgcc^../../../xgcc^' -e 's^-B./^-B../../../^'` --OSCONS_CPP=$(OSCONS_CC) $(GNATLIBCFLAGS) -E -C \ -- -DTARGET=\"$(target)\" s-oscons-tmplt.c > s-oscons-tmplt.i --OSCONS_EXTRACT=$(OSCONS_CC) -S s-oscons-tmplt.i --endif -- --$(ADA_GEN_SUBDIR)/s-oscons.ads : $(ADA_GEN_SUBDIR)/s-oscons-tmplt.c $(ADA_GEN_SUBDIR)/gsocket.h $(ADA_GEN_SUBDIR)/xoscons.adb $(ADA_GEN_SUBDIR)/xutil.ads $(ADA_GEN_SUBDIR)/xutil.adb -- -$(MKDIR) $(ADA_GEN_SUBDIR)/bldtools/oscons -- $(RM) $(addprefix $(ADA_GEN_SUBDIR)/bldtools/oscons/,$(notdir $^)) -- $(CP) $^ $(ADA_GEN_SUBDIR)/bldtools/oscons -- (cd $(ADA_GEN_SUBDIR)/bldtools/oscons ; gnatmake -q xoscons ; \ -- $(RM) s-oscons-tmplt.i s-oscons-tmplt.s ; \ -- $(OSCONS_CPP) ; \ -- $(OSCONS_EXTRACT) ; \ -- ./xoscons ; \ -- $(RM) ../../s-oscons.ads ; \ -- $(CP) s-oscons.ads s-oscons.h ../../) -- - $(ADA_GEN_SUBDIR)/sdefault.adb: $(ADA_GEN_SUBDIR)/stamp-sdefault ; @true - $(ADA_GEN_SUBDIR)/stamp-sdefault : $(srcdir)/version.c Makefile - $(ECHO) "pragma Style_Checks (Off);" >tmp-sdefault.adb ---- a/src/gcc/ada/gcc-interface/Makefile.in -+++ b/src/gcc/ada/gcc-interface/Makefile.in -@@ -2189,7 +2189,6 @@ - - EH_MECHANISM=-gcc - GNATLIB_SHARED = gnatlib-shared-darwin -- RANLIB = ranlib -c - GMEM_LIB = gmemlib - LIBRARY_VERSION := $(LIB_VERSION) - soext = .dylib -@@ -2447,21 +2446,52 @@ - $(foreach PAIR,$(LIBGNAT_TARGET_PAIRS), \ - $(LN_S) $(fsrcpfx)ada/$(word 2,$(subst <, ,$(PAIR))) \ - $(RTSDIR)/$(word 1,$(subst <, ,$(PAIR)));) --# Copy generated target dependent sources -- $(RM) $(RTSDIR)/s-oscons.ads -- (cd $(RTSDIR); $(LN_S) ../s-oscons.ads s-oscons.ads) -+# Copy tsystem.h -+ $(CP) $(srcdir)/tsystem.h $(RTSDIR) - $(RM) ../stamp-gnatlib-$(RTSDIR) - touch ../stamp-gnatlib1-$(RTSDIR) - - # GNULLI End ############################################################# - -+ifeq ($(strip $(filter-out alpha64 ia64 dec hp vms% openvms% alphavms%,$(subst -, ,$(host)))),) -+OSCONS_CPP=../../$(DECC) -E /comment=as_is -DNATIVE \ -+ -DTARGET='""$(target)""' $(fsrcpfx)ada/s-oscons-tmplt.c -+ -+OSCONS_EXTRACT=../../$(DECC) -DNATIVE \ -+ -DTARGET='""$(target)""' $(fsrcpfx)ada/s-oscons-tmplt.c ; \ -+ ld -o s-oscons-tmplt.exe s-oscons-tmplt.obj; \ -+ ./s-oscons-tmplt.exe > s-oscons-tmplt.s -+ -+else -+# GCC_FOR_TARGET has paths relative to the gcc directory, so we need to adjust -+# for running it from $(RTSDIR) -+OSCONS_CC=`echo "$(GCC_FOR_TARGET)" \ -+ | sed -e 's^\./xgcc^../../xgcc^' -e 's^-B./^-B../../^'` -+OSCONS_CPP=$(OSCONS_CC) $(GNATLIBCFLAGS) -E -C \ -+ -DTARGET=\"$(target)\" $(fsrcpfx)ada/s-oscons-tmplt.c > s-oscons-tmplt.i -+OSCONS_EXTRACT=$(OSCONS_CC) -S s-oscons-tmplt.i -+endif -+ -+./bldtools/oscons/xoscons: xoscons.adb xutil.ads xutil.adb -+ -$(MKDIR) ./bldtools/oscons -+ $(RM) $(addprefix ./bldtools/oscons/,$(notdir $^)) -+ $(CP) $^ ./bldtools/oscons -+ (cd ./bldtools/oscons ; gnatmake -q xoscons) -+ -+$(RTSDIR)/s-oscons.ads: ../stamp-gnatlib1-$(RTSDIR) s-oscons-tmplt.c gsocket.h ./bldtools/oscons/xoscons -+ $(RM) $(RTSDIR)/s-oscons-tmplt.i $(RTSDIR)/s-oscons-tmplt.s -+ (cd $(RTSDIR) ; \ -+ $(OSCONS_CPP) ; \ -+ $(OSCONS_EXTRACT) ; \ -+ ../bldtools/oscons/xoscons) -+ - # Don't use semicolon separated shell commands that involve list expansions. - # The semicolon triggers a call to DCL on VMS and DCL can't handle command - # line lengths in excess of 256 characters. - # Example: cd $(RTSDIR); ar rc libfoo.a $(LONG_LIST_OF_OBJS) - # is guaranteed to overflow the buffer. - --gnatlib: ../stamp-gnatlib1-$(RTSDIR) ../stamp-gnatlib2-$(RTSDIR) -+gnatlib: ../stamp-gnatlib1-$(RTSDIR) ../stamp-gnatlib2-$(RTSDIR) $(RTSDIR)/s-oscons.ads - $(MAKE) -C $(RTSDIR) \ - CC="`echo \"$(GCC_FOR_TARGET)\" \ - | sed -e 's,\./xgcc,../../xgcc,' -e 's,-B\./,-B../../,'`" \ ---- a/src/gcc/ada/init.c -+++ b/src/gcc/ada/init.c -@@ -2216,12 +2216,33 @@ - return 0; - } - -+#define HAVE_GNAT_ADJUST_CONTEXT_FOR_RAISE -+ -+void -+__gnat_adjust_context_for_raise (int signo ATTRIBUTE_UNUSED, -+ void *ucontext ATTRIBUTE_UNUSED) -+{ -+#if defined (__x86_64__) -+ /* Work around radar #10302855/pr50678, where the unwinders (libunwind or -+ libgcc_s depending on the system revision) and the DWARF unwind data for -+ the sigtramp have different ideas about register numbering (causing rbx -+ and rdx to be transposed).. */ -+ ucontext_t *uc = (ucontext_t *)ucontext ; -+ unsigned long t = uc->uc_mcontext->__ss.__rbx; -+ -+ uc->uc_mcontext->__ss.__rbx = uc->uc_mcontext->__ss.__rdx; -+ uc->uc_mcontext->__ss.__rdx = t; -+#endif -+} -+ - static void --__gnat_error_handler (int sig, siginfo_t *si, void *ucontext ATTRIBUTE_UNUSED) -+__gnat_error_handler (int sig, siginfo_t *si, void *ucontext) - { - struct Exception_Data *exception; - const char *msg; - -+ __gnat_adjust_context_for_raise (sig, ucontext); -+ - switch (sig) - { - case SIGSEGV: ---- a/src/gcc/ada/mlib-tgt-specific-darwin.adb -+++ b/src/gcc/ada/mlib-tgt-specific-darwin.adb -@@ -36,8 +36,6 @@ - - -- Non default subprograms - -- function Archive_Indexer_Options return String_List_Access; -- - procedure Build_Dynamic_Library - (Ofiles : Argument_List; - Options : Argument_List; -@@ -67,15 +65,6 @@ - (1 => Flat_Namespace'Access, - 2 => Shared_Libgcc'Access); - -- ----------------------------- -- -- Archive_Indexer_Options -- -- ----------------------------- -- -- function Archive_Indexer_Options return String_List_Access is -- begin -- return new String_List'(1 => new String'("-c")); -- end Archive_Indexer_Options; -- - --------------------------- - -- Build_Dynamic_Library -- - --------------------------- -@@ -180,7 +169,6 @@ - end Is_Archive_Ext; - - begin -- Archive_Indexer_Options_Ptr := Archive_Indexer_Options'Access; - Build_Dynamic_Library_Ptr := Build_Dynamic_Library'Access; - DLL_Ext_Ptr := DLL_Ext'Access; - Dynamic_Option_Ptr := Dynamic_Option'Access; ---- a/src/gcc/builtins.c -+++ b/src/gcc/builtins.c -@@ -264,7 +264,14 @@ - } - - /* Return the alignment in bits of EXP, an object. -- Don't return more than MAX_ALIGN no matter what. */ -+ Don't return more than MAX_ALIGN no matter what. -+ -+ Note that the address (and thus the alignment) computed here is based -+ on the address to which a symbol resolves, whereas DECL_ALIGN is based -+ on the address at which an object is actually located. These two -+ addresses are not always the same. For example, on ARM targets, -+ the address &foo of a Thumb function foo() has the lowest bit set, -+ whereas foo() itself starts on an even address. */ - - unsigned int - get_object_alignment (tree exp, unsigned int max_align) -@@ -286,7 +293,21 @@ - exp = DECL_INITIAL (exp); - if (DECL_P (exp) - && TREE_CODE (exp) != LABEL_DECL) -- align = DECL_ALIGN (exp); -+ { -+ if (TREE_CODE (exp) == FUNCTION_DECL) -+ { -+ /* Function addresses can encode extra information besides their -+ alignment. However, if TARGET_PTRMEMFUNC_VBIT_LOCATION -+ allows the low bit to be used as a virtual bit, we know -+ that the address itself must be 2-byte aligned. */ -+ if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) -+ align = 2 * BITS_PER_UNIT; -+ else -+ align = BITS_PER_UNIT; -+ } -+ else -+ align = DECL_ALIGN (exp); -+ } - else if (CONSTANT_CLASS_P (exp)) - { - align = TYPE_ALIGN (TREE_TYPE (exp)); ---- a/src/gcc/c-family/ChangeLog -+++ b/src/gcc/c-family/ChangeLog -@@ -1,3 +1,15 @@ -+2011-11-04 Eric Botcazou -+ -+ PR c++/50608 -+ * c-common.c (c_fully_fold_internal) : Call fold_offsetof_1. -+ (fold_offsetof_1): Make global. Remove STOP_REF argument and adjust. -+ : Return the argument. -+ : Remove special code for negative offset. -+ Call fold_build_pointer_plus instead of size_binop. -+ (fold_offsetof): Remove STOP_REF argument and adjust. -+ * c-common.h (fold_offsetof_1): Declare. -+ (fold_offsetof): Remove STOP_REF argument. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/gcc/c-family/c-common.c -+++ b/src/gcc/c-family/c-common.c -@@ -1236,13 +1236,7 @@ - && (op1 = get_base_address (op0)) != NULL_TREE - && TREE_CODE (op1) == INDIRECT_REF - && TREE_CONSTANT (TREE_OPERAND (op1, 0))) -- { -- tree offset = fold_offsetof (op0, op1); -- op1 -- = fold_convert_loc (loc, TREE_TYPE (expr), TREE_OPERAND (op1, 0)); -- ret = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (expr), op1, -- offset); -- } -+ ret = fold_convert_loc (loc, TREE_TYPE (expr), fold_offsetof_1 (op0)); - else if (op0 != orig_op0 || in_init) - ret = in_init - ? fold_build1_initializer_loc (loc, code, TREE_TYPE (expr), op0) -@@ -8459,20 +8453,15 @@ - return uc; - } - --/* Build the result of __builtin_offsetof. EXPR is a nested sequence of -- component references, with STOP_REF, or alternatively an INDIRECT_REF of -- NULL, at the bottom; much like the traditional rendering of offsetof as a -- macro. Returns the folded and properly cast result. */ -+/* Fold an offsetof-like expression. EXPR is a nested sequence of component -+ references with an INDIRECT_REF of a constant at the bottom; much like the -+ traditional rendering of offsetof as a macro. Return the folded result. */ - --static tree --fold_offsetof_1 (tree expr, tree stop_ref) -+tree -+fold_offsetof_1 (tree expr) - { -- enum tree_code code = PLUS_EXPR; - tree base, off, t; - -- if (expr == stop_ref && TREE_CODE (expr) != ERROR_MARK) -- return size_zero_node; -- - switch (TREE_CODE (expr)) - { - case ERROR_MARK: -@@ -8489,15 +8478,15 @@ - - case NOP_EXPR: - case INDIRECT_REF: -- if (!integer_zerop (TREE_OPERAND (expr, 0))) -+ if (!TREE_CONSTANT (TREE_OPERAND (expr, 0))) - { - error ("cannot apply % to a non constant address"); - return error_mark_node; - } -- return size_zero_node; -+ return TREE_OPERAND (expr, 0); - - case COMPONENT_REF: -- base = fold_offsetof_1 (TREE_OPERAND (expr, 0), stop_ref); -+ base = fold_offsetof_1 (TREE_OPERAND (expr, 0)); - if (base == error_mark_node) - return base; - -@@ -8515,21 +8504,14 @@ - break; - - case ARRAY_REF: -- base = fold_offsetof_1 (TREE_OPERAND (expr, 0), stop_ref); -+ base = fold_offsetof_1 (TREE_OPERAND (expr, 0)); - if (base == error_mark_node) - return base; - - t = TREE_OPERAND (expr, 1); -- if (TREE_CODE (t) == INTEGER_CST && tree_int_cst_sgn (t) < 0) -- { -- code = MINUS_EXPR; -- t = fold_build1_loc (input_location, NEGATE_EXPR, TREE_TYPE (t), t); -- } -- t = convert (sizetype, t); -- off = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (TREE_TYPE (expr)), t); - - /* Check if the offset goes beyond the upper bound of the array. */ -- if (code == PLUS_EXPR && TREE_CODE (t) == INTEGER_CST) -+ if (TREE_CODE (t) == INTEGER_CST && tree_int_cst_sgn (t) >= 0) - { - tree upbound = array_ref_up_bound (expr); - if (upbound != NULL_TREE -@@ -8569,26 +8551,30 @@ - } - } - } -+ -+ t = convert (sizetype, t); -+ off = size_binop (MULT_EXPR, TYPE_SIZE_UNIT (TREE_TYPE (expr)), t); - break; - - case COMPOUND_EXPR: - /* Handle static members of volatile structs. */ - t = TREE_OPERAND (expr, 1); - gcc_assert (TREE_CODE (t) == VAR_DECL); -- return fold_offsetof_1 (t, stop_ref); -+ return fold_offsetof_1 (t); - - default: - gcc_unreachable (); - } - -- return size_binop (code, base, off); -+ return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, off); - } - -+/* Likewise, but convert it to the return type of offsetof. */ -+ - tree --fold_offsetof (tree expr, tree stop_ref) -+fold_offsetof (tree expr) - { -- /* Convert back from the internal sizetype to size_t. */ -- return convert (size_type_node, fold_offsetof_1 (expr, stop_ref)); -+ return convert (size_type_node, fold_offsetof_1 (expr)); - } - - /* Warn for A ?: C expressions (with B omitted) where A is a boolean ---- a/src/gcc/c-family/c-common.h -+++ b/src/gcc/c-family/c-common.h -@@ -916,7 +916,8 @@ - - extern void verify_sequence_points (tree); - --extern tree fold_offsetof (tree, tree); -+extern tree fold_offsetof_1 (tree); -+extern tree fold_offsetof (tree); - - /* Places where an lvalue, or modifiable lvalue, may be required. - Used to select diagnostic messages in lvalue_error and ---- a/src/gcc/c-parser.c -+++ b/src/gcc/c-parser.c -@@ -6294,7 +6294,7 @@ - c_parser_error (parser, "expected identifier"); - c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, - "expected %<)%>"); -- expr.value = fold_offsetof (offsetof_ref, NULL_TREE); -+ expr.value = fold_offsetof (offsetof_ref); - } - break; - case RID_CHOOSE_EXPR: ---- a/src/gcc/c-typeck.c -+++ b/src/gcc/c-typeck.c -@@ -3802,11 +3802,7 @@ - if (val && TREE_CODE (val) == INDIRECT_REF - && TREE_CONSTANT (TREE_OPERAND (val, 0))) - { -- tree op0 = fold_convert_loc (location, sizetype, -- fold_offsetof (arg, val)), op1; -- -- op1 = fold_convert_loc (location, argtype, TREE_OPERAND (val, 0)); -- ret = fold_build2_loc (location, POINTER_PLUS_EXPR, argtype, op1, op0); -+ ret = fold_convert_loc (location, argtype, fold_offsetof_1 (arg)); - goto return_build_unary_op; - } - ---- a/src/gcc/calls.c -+++ b/src/gcc/calls.c -@@ -686,7 +686,7 @@ - /* If the value is a non-legitimate constant, force it into a - pseudo now. TLS symbols sometimes need a call to resolve. */ - if (CONSTANT_P (args[i].value) -- && !LEGITIMATE_CONSTANT_P (args[i].value)) -+ && !targetm.legitimate_constant_p (args[i].mode, args[i].value)) - args[i].value = force_reg (args[i].mode, args[i].value); - - /* If we are to promote the function arg to a wider mode, -@@ -3449,7 +3449,8 @@ - - /* Make sure it is a reasonable operand for a move or push insn. */ - if (!REG_P (addr) && !MEM_P (addr) -- && ! (CONSTANT_P (addr) && LEGITIMATE_CONSTANT_P (addr))) -+ && !(CONSTANT_P (addr) -+ && targetm.legitimate_constant_p (Pmode, addr))) - addr = force_operand (addr, NULL_RTX); - - argvec[count].value = addr; -@@ -3490,7 +3491,7 @@ + argvec[count].value = addr; +@@ -3619,7 +3620,7 @@ /* Make sure it is a reasonable operand for a move or push insn. */ if (!REG_P (val) && !MEM_P (val) @@ -3873,40 +4030,6 @@ val = force_operand (val, NULL_RTX); if (pass_by_reference (&args_so_far, mode, NULL_TREE, 1)) ---- a/src/gcc/cfgcleanup.c -+++ b/src/gcc/cfgcleanup.c -@@ -1969,7 +1969,14 @@ - - cond = get_condition (jump, &move_before, true, false); - if (cond == NULL_RTX) -- move_before = jump; -+ { -+#ifdef HAVE_cc0 -+ if (reg_mentioned_p (cc0_rtx, jump)) -+ move_before = prev_nonnote_nondebug_insn (jump); -+ else -+#endif -+ move_before = jump; -+ } - - for (ix = 0; ix < nedges; ix++) - if (EDGE_SUCC (bb, ix)->dest == EXIT_BLOCK_PTR) -@@ -2131,7 +2138,14 @@ - jump = BB_END (final_dest_bb); - cond = get_condition (jump, &move_before, true, false); - if (cond == NULL_RTX) -- move_before = jump; -+ { -+#ifdef HAVE_cc0 -+ if (reg_mentioned_p (cc0_rtx, jump)) -+ move_before = prev_nonnote_nondebug_insn (jump); -+ else -+#endif -+ move_before = jump; -+ } - } - - do --- a/src/gcc/cfgexpand.c +++ b/src/gcc/cfgexpand.c @@ -1843,12 +1843,17 @@ @@ -3959,7 +4082,7 @@ static rtx simplify_if_then_else (rtx); static rtx simplify_set (rtx); static rtx simplify_logical (rtx); -@@ -3089,7 +3089,7 @@ +@@ -3130,7 +3130,7 @@ /* It is possible that the source of I2 or I1 may be performing an unneeded operation, such as a ZERO_EXTEND of something that is known to have the high part zero. Handle that case @@ -3968,7 +4091,7 @@ Another way to do this would be to have a function that tries to simplify a single insn instead of merging two or more -@@ -3112,13 +3112,11 @@ +@@ -3153,13 +3153,11 @@ if (i1) { subst_low_luid = DF_INSN_LUID (i1); @@ -3986,7 +4109,7 @@ } n_occurrences = 0; /* `subst' counts here */ -@@ -3129,7 +3127,7 @@ +@@ -3170,7 +3168,7 @@ self-referential RTL when we will be substituting I1SRC for I1DEST later. Likewise if I0 feeds into I2, either directly or indirectly through I1, and I0DEST is in I0SRC. */ @@ -3995,7 +4118,7 @@ (i1_feeds_i2_n && i1dest_in_i1src) || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n)) && i0dest_in_i0src)); -@@ -3168,7 +3166,7 @@ +@@ -3214,7 +3212,7 @@ copy of I1SRC each time we substitute it, in order to avoid creating self-referential RTL when we will be substituting I0SRC for I0DEST later. */ @@ -4004,7 +4127,7 @@ i0_feeds_i1_n && i0dest_in_i0src); substed_i1 = 1; -@@ -3198,7 +3196,7 @@ +@@ -3248,7 +3246,7 @@ n_occurrences = 0; subst_low_luid = DF_INSN_LUID (i0); @@ -4013,16 +4136,16 @@ substed_i0 = 1; } -@@ -3260,7 +3258,7 @@ +@@ -3310,7 +3308,7 @@ { rtx t = i1pat; if (i0_feeds_i1_n) -- t = subst (t, i0dest, i0src, 0, 0); -+ t = subst (t, i0dest, i0src, 0, 0, 0); +- t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0); ++ t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0); XVECEXP (newpat, 0, --total_sets) = t; } -@@ -3268,10 +3266,10 @@ +@@ -3318,10 +3316,10 @@ { rtx t = i2pat; if (i1_feeds_i2_n) @@ -4030,12 +4153,12 @@ + t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0, i0_feeds_i1_n && i0dest_in_i0src); if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n) -- t = subst (t, i0dest, i0src, 0, 0); -+ t = subst (t, i0dest, i0src, 0, 0, 0); +- t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0); ++ t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0); XVECEXP (newpat, 0, --total_sets) = t; } -@@ -4943,11 +4941,13 @@ +@@ -4998,11 +4996,13 @@ IN_DEST is nonzero if we are processing the SET_DEST of a SET. @@ -4050,7 +4173,7 @@ { enum rtx_code code = GET_CODE (x); enum machine_mode op0_mode = VOIDmode; -@@ -5008,7 +5008,7 @@ +@@ -5063,7 +5063,7 @@ && GET_CODE (XVECEXP (x, 0, 0)) == SET && GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS) { @@ -4059,7 +4182,7 @@ /* If this substitution failed, this whole thing fails. */ if (GET_CODE (new_rtx) == CLOBBER -@@ -5025,7 +5025,7 @@ +@@ -5080,7 +5080,7 @@ && GET_CODE (dest) != CC0 && GET_CODE (dest) != PC) { @@ -4068,7 +4191,7 @@ /* If this substitution failed, this whole thing fails. */ if (GET_CODE (new_rtx) == CLOBBER -@@ -5071,8 +5071,8 @@ +@@ -5126,8 +5126,8 @@ } else { @@ -4079,7 +4202,7 @@ /* If this substitution failed, this whole thing fails. */ -@@ -5149,7 +5149,9 @@ +@@ -5204,7 +5204,9 @@ && (code == SUBREG || code == STRICT_LOW_PART || code == ZERO_EXTRACT)) || code == SET) @@ -4090,7 +4213,7 @@ /* If we found that we will have to reject this combination, indicate that by returning the CLOBBER ourselves, rather than -@@ -5206,7 +5208,7 @@ +@@ -5261,7 +5263,7 @@ /* If X is sufficiently simple, don't bother trying to do anything with it. */ if (code != CONST_INT && code != REG && code != CLOBBER) @@ -4099,7 +4222,7 @@ if (GET_CODE (x) == code) break; -@@ -5226,10 +5228,12 @@ +@@ -5281,10 +5283,12 @@ expression. OP0_MODE is the original mode of XEXP (x, 0). IN_DEST is nonzero @@ -4114,7 +4237,7 @@ { enum rtx_code code = GET_CODE (x); enum machine_mode mode = GET_MODE (x); -@@ -5284,8 +5288,8 @@ +@@ -5339,8 +5343,8 @@ false arms to store-flag values. Be careful to use copy_rtx here since true_rtx or false_rtx might share RTL with x as a result of the if_then_else_cond call above. */ @@ -4125,7 +4248,7 @@ /* If true_rtx and false_rtx are not general_operands, an if_then_else is unlikely to be simpler. */ -@@ -5629,7 +5633,7 @@ +@@ -5684,7 +5688,7 @@ { /* Try to simplify the expression further. */ rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1)); @@ -4134,7 +4257,7 @@ /* If we could, great. If not, do not go ahead with the IOR replacement, since PLUS appears in many special purpose -@@ -5722,7 +5726,16 @@ +@@ -5777,7 +5781,16 @@ ZERO_EXTRACT is indeed appropriate, it will be placed back by the call to make_compound_operation in the SET case. */ @@ -4152,7 +4275,7 @@ && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT && op1 == const0_rtx && mode == GET_MODE (op0) -@@ -5768,7 +5781,10 @@ +@@ -5823,7 +5836,10 @@ /* If STORE_FLAG_VALUE is -1, we have cases similar to those above. */ @@ -4164,7 +4287,7 @@ && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT && op1 == const0_rtx && (num_sign_bit_copies (op0, mode) -@@ -5966,11 +5982,11 @@ +@@ -6021,11 +6037,11 @@ if (reg_mentioned_p (from, true_rtx)) true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code, from, true_val), @@ -4178,7 +4301,7 @@ SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx); SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx); -@@ -6187,11 +6203,11 @@ +@@ -6242,11 +6258,11 @@ { temp = subst (simplify_gen_relational (true_code, m, VOIDmode, cond_op0, cond_op1), @@ -4192,7 +4315,7 @@ temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp); if (extend_op != UNKNOWN) -@@ -6271,10 +6287,18 @@ +@@ -6326,10 +6342,18 @@ enum rtx_code new_code; rtx op0, op1, tmp; int other_changed = 0; @@ -4212,7 +4335,7 @@ else op0 = src, op1 = CONST0_RTX (GET_MODE (src)); -@@ -6316,6 +6340,12 @@ +@@ -6371,6 +6395,12 @@ need to use a different CC mode here. */ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) compare_mode = GET_MODE (op0); @@ -4225,131 +4348,28 @@ else compare_mode = SELECT_CC_MODE (new_code, op0, op1); ---- a/src/gcc/config/arm/arm-cores.def -+++ b/src/gcc/config/arm/arm-cores.def -@@ -70,10 +70,10 @@ - /* V4 Architecture Processors */ - ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) - ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) --ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) --ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) --ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) --ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) -+ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -+ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -+ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -+ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) - ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul) - ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul) - -@@ -122,15 +122,18 @@ - ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) - ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) - ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) --ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) --ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e) --ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) --ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) -+ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) -+ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) -+ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) -+ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) -+ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) - ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) --ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e) --ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) --ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) --ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e) --ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) --ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) --ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) -+ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) -+ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) -+ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) -+ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) -+ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) -+ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) -+ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) -+ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) +--- a/src/gcc/common.opt ++++ b/src/gcc/common.opt +@@ -1617,6 +1617,19 @@ + Common Report Var(flag_sched_pressure) Init(0) Optimization + Enable register pressure sensitive insn scheduling + ++fsched-pressure-algorithm= ++Common Joined RejectNegative Enum(sched_pressure_algorithm) Var(flag_sched_pressure_algorithm) Init(SCHED_PRESSURE_WEIGHTED) ++-fira-algorithm=[CB|priority] Set the used IRA algorithm + ---- a/src/gcc/config/arm/arm-protos.h -+++ b/src/gcc/config/arm/arm-protos.h -@@ -46,6 +46,7 @@ - extern bool arm_small_register_classes_for_mode_p (enum machine_mode); - extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); - extern int const_ok_for_arm (HOST_WIDE_INT); -+extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); - extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, - HOST_WIDE_INT, rtx, rtx, int); - extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); -@@ -58,14 +59,19 @@ - int); - extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, - int); -+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); - extern int arm_const_double_rtx (rtx); - extern int neg_const_double_rtx_ok_for_fpa (rtx); - extern int vfp3_const_double_rtx (rtx); - extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); - extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, - int *); -+extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *, -+ int *, bool); - extern char *neon_output_logic_immediate (const char *, rtx *, - enum machine_mode, int, int); -+extern char *neon_output_shift_immediate (const char *, char, rtx *, -+ enum machine_mode, int, bool); - extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, - rtx (*) (rtx, rtx, rtx)); - extern rtx neon_make_constant (rtx); -@@ -81,7 +87,6 @@ - extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, - bool); - extern bool arm_tls_referenced_p (rtx); --extern bool arm_cannot_force_const_mem (rtx); - - extern int cirrus_memory_offset (rtx); - extern int arm_coproc_mem_operand (rtx, bool); -@@ -152,6 +157,7 @@ - extern const char *arm_output_memory_barrier (rtx *); - extern const char *arm_output_sync_insn (rtx, rtx *); - extern unsigned int arm_sync_loop_insns (rtx , rtx *); -+extern int arm_attr_length_push_multi(rtx, rtx); - - #if defined TREE_CODE - extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); -@@ -175,6 +181,7 @@ - #endif - extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); - #ifdef RTX_CODE -+extern enum arm_cond_code maybe_get_arm_condition_code (rtx); - extern void thumb1_final_prescan_insn (rtx); - extern void thumb2_final_prescan_insn (rtx); - extern const char *thumb_load_double_from_address (rtx *); -@@ -220,9 +227,14 @@ - bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); - bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); - int constant_limit; -+ /* Maximum number of instructions to conditionalise in -+ arm_final_prescan_insn. */ -+ int max_insns_skipped; - int num_prefetch_slots; - int l1_cache_size; - int l1_cache_line_size; -+ bool prefer_constant_pool; -+ int (*branch_cost) (bool, bool); - }; - - extern const struct tune_params *current_tune; ---- a/src/gcc/config/arm/arm-tune.md -+++ b/src/gcc/config/arm/arm-tune.md -@@ -1,5 +1,5 @@ - ;; -*- buffer-read-only: t -*- - ;; Generated automatically by gentune.sh from arm-cores.def - (define_attr "tune" -- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" -+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" - (const (symbol_ref "((enum attr_tune) arm_tune)"))) ++Enum ++Name(sched_pressure_algorithm) Type(enum sched_pressure_algorithm) UnknownError(unknown % algorithm %qs) ++ ++EnumValue ++Enum(sched_pressure_algorithm) String(weighted) Value(SCHED_PRESSURE_WEIGHTED) ++ ++EnumValue ++Enum(sched_pressure_algorithm) String(model) Value(SCHED_PRESSURE_MODEL) ++ + fsched-spec + Common Report Var(flag_schedule_speculative) Init(1) Optimization + Allow speculative motion of non-loads --- a/src/gcc/config/arm/arm.c +++ b/src/gcc/config/arm/arm.c @@ -63,6 +63,11 @@ @@ -4412,8 +4432,8 @@ + unsigned HOST_WIDE_INT); static enum machine_mode arm_preferred_simd_mode (enum machine_mode); static bool arm_class_likely_spilled_p (reg_class_t); - static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); -@@ -250,6 +265,9 @@ + static HOST_WIDE_INT arm_vector_alignment (const_tree type); +@@ -251,6 +266,9 @@ bool is_packed); static void arm_conditional_register_usage (void); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); @@ -4423,7 +4443,19 @@ /* Table of machine attributes. */ -@@ -393,8 +411,13 @@ +@@ -294,6 +312,11 @@ + /* Set default optimization options. */ + static const struct default_options arm_option_optimization_table[] = + { ++ /* Enable -fsched-pressure using -fsched-pressure-algorithm=model ++ by default when optimizing. */ ++ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, ++ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure_algorithm_, ++ NULL, SCHED_PRESSURE_MODEL }, + /* Enable section anchors by default at -O1 or higher. */ + { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, +@@ -394,8 +417,13 @@ #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p @@ -4437,7 +4469,7 @@ #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg -@@ -403,6 +426,8 @@ +@@ -404,6 +432,8 @@ #define TARGET_INIT_BUILTINS arm_init_builtins #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN arm_expand_builtin @@ -4446,7 +4478,7 @@ #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS arm_init_libfuncs -@@ -519,6 +544,9 @@ +@@ -520,6 +550,9 @@ #undef TARGET_HAVE_CONDITIONAL_EXECUTION #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution @@ -4456,7 +4488,7 @@ #undef TARGET_CANNOT_FORCE_CONST_MEM #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem -@@ -659,12 +687,13 @@ +@@ -663,12 +696,13 @@ #define FL_THUMB2 (1 << 16) /* Thumb-2. */ #define FL_NOTM (1 << 17) /* Instructions not present in the 'M' profile. */ @@ -4471,7 +4503,7 @@ #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ -@@ -691,8 +720,8 @@ +@@ -695,8 +729,8 @@ #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) @@ -4482,7 +4514,7 @@ #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) /* The bits in this mask specify which -@@ -778,7 +807,8 @@ +@@ -782,7 +816,8 @@ int arm_arch_thumb2; /* Nonzero if chip supports integer division instruction. */ @@ -4492,7 +4524,7 @@ /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we must report the mode of the memory reference from -@@ -851,48 +881,117 @@ +@@ -855,48 +890,117 @@ { arm_slowmul_rtx_costs, NULL, @@ -4622,7 +4654,7 @@ }; -@@ -1698,7 +1797,8 @@ +@@ -1702,7 +1806,8 @@ arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; @@ -4632,7 +4664,7 @@ arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; /* If we are not using the default (ARM mode) section anchor offset -@@ -1965,6 +2065,28 @@ +@@ -1969,6 +2074,28 @@ fix_cm3_ldrd = 0; } @@ -4661,7 +4693,7 @@ if (TARGET_THUMB1 && flag_schedule_insns) { /* Don't warn since it's on by default in -O2. */ -@@ -1978,12 +2100,7 @@ +@@ -1982,12 +2109,7 @@ max_insns_skipped = 6; } else @@ -4675,7 +4707,7 @@ /* Hot/Cold partitioning is not currently supported, since we can't handle literal pool placement in that case. */ -@@ -2440,7 +2557,7 @@ +@@ -2445,7 +2567,7 @@ } /* Return true if I is a valid constant for the operation CODE. */ @@ -4684,7 +4716,7 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) { if (const_ok_for_arm (i)) -@@ -2448,7 +2565,21 @@ +@@ -2453,7 +2575,21 @@ switch (code) { @@ -4706,7 +4738,7 @@ case COMPARE: case EQ: case NE: -@@ -2564,68 +2695,41 @@ +@@ -2569,68 +2705,41 @@ 1); } @@ -4799,7 +4831,7 @@ } } -@@ -2652,13 +2756,161 @@ +@@ -2657,13 +2766,161 @@ the constant starting from `best_start', and also starting from zero (i.e. with bit 31 first to be output). If `best_start' doesn't yield a shorter sequence, we may as well use zero. */ @@ -4966,7 +4998,7 @@ } /* Emit an instruction with the indicated PATTERN. If COND is -@@ -2675,7 +2927,6 @@ +@@ -2680,7 +2937,6 @@ /* As above, but extra parameter GENERATE which, if clear, suppresses RTL generation. */ @@ -4974,7 +5006,7 @@ static int arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, -@@ -2687,15 +2938,15 @@ +@@ -2692,15 +2948,15 @@ int final_invert = 0; int can_negate_initial = 0; int i; @@ -4993,7 +5025,7 @@ /* Find out which operations are safe for a given CODE. Also do a quick check for degenerate cases; these can occur when DImode operations -@@ -2704,7 +2955,6 @@ +@@ -2709,7 +2965,6 @@ { case SET: can_invert = 1; @@ -5001,7 +5033,7 @@ break; case PLUS: -@@ -2732,9 +2982,6 @@ +@@ -2737,9 +2992,6 @@ gen_rtx_SET (VOIDmode, target, source)); return 1; } @@ -5011,7 +5043,7 @@ break; case AND: -@@ -2776,6 +3023,7 @@ +@@ -2781,6 +3033,7 @@ gen_rtx_NOT (mode, source))); return 1; } @@ -5019,7 +5051,7 @@ break; case MINUS: -@@ -2798,7 +3046,6 @@ +@@ -2803,7 +3056,6 @@ source))); return 1; } @@ -5027,7 +5059,7 @@ break; -@@ -2807,9 +3054,7 @@ +@@ -2812,9 +3064,7 @@ } /* If we can do it in one insn get out quickly. */ @@ -5038,7 +5070,7 @@ { if (generate) emit_constant_insn (cond, -@@ -2862,15 +3107,6 @@ +@@ -2867,15 +3117,6 @@ switch (code) { case SET: @@ -5054,7 +5086,7 @@ /* See if we can do this by sign_extending a constant that is known to be negative. This is a good, way of doing it, since the shift may well merge into a subsequent insn. */ -@@ -3221,121 +3457,97 @@ +@@ -3226,121 +3467,97 @@ break; } @@ -5252,7 +5284,59 @@ if (final_invert) { -@@ -5916,7 +6128,7 @@ +@@ -4119,6 +4336,11 @@ + (TARGET_VFP_DOUBLE || !is_double)); + } + ++/* Return true if an argument whose type is TYPE, or mode is MODE, is ++ suitable for passing or returning in VFP registers for the PCS ++ variant selected. If it is, then *BASE_MODE is updated to contain ++ a machine mode describing each element of the argument's type and ++ *COUNT to hold the number of such elements. */ + static bool + aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type, +@@ -4126,9 +4348,20 @@ + { + enum machine_mode new_mode = VOIDmode; + +- if (GET_MODE_CLASS (mode) == MODE_FLOAT +- || GET_MODE_CLASS (mode) == MODE_VECTOR_INT +- || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) ++ /* If we have the type information, prefer that to working things ++ out from the mode. */ ++ if (type) ++ { ++ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); ++ ++ if (ag_count > 0 && ag_count <= 4) ++ *count = ag_count; ++ else ++ return false; ++ } ++ else if (GET_MODE_CLASS (mode) == MODE_FLOAT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT ++ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + *count = 1; + new_mode = mode; +@@ -4138,15 +4371,6 @@ + *count = 2; + new_mode = (mode == DCmode ? DFmode : SFmode); + } +- else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) +- { +- int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); +- +- if (ag_count > 0 && ag_count <= 4) +- *count = ag_count; +- else +- return false; +- } + else + return false; + +@@ -5950,7 +6174,7 @@ addresses based on the frame pointer or arg pointer until the reload pass starts. This is so that eliminating such addresses into stack based ones won't produce impossible code. */ @@ -5261,7 +5345,32 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) { /* ??? Not clear if this is right. Experiment. */ -@@ -6406,23 +6618,134 @@ +@@ -6432,31 +6656,159 @@ + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) + { ++ /* We must recognize output that we have already generated ourselves. */ ++ if (GET_CODE (*p) == PLUS ++ && GET_CODE (XEXP (*p, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG ++ && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT ++ && GET_CODE (XEXP (*p, 1)) == CONST_INT) ++ { ++ push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, ++ MODE_BASE_REG_CLASS (mode), GET_MODE (*p), ++ VOIDmode, 0, 0, opnum, (enum reload_type) type); ++ return true; ++ } ++ + if (GET_CODE (*p) == PLUS + && GET_CODE (XEXP (*p, 0)) == REG + && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) ++ /* If the base register is equivalent to a constant, let the generic ++ code handle it. Otherwise we will run into problems if a future ++ reload pass decides to rematerialize the constant. */ ++ && !reg_equiv_constant [ORIGINAL_REGNO (XEXP (*p, 0))] + && GET_CODE (XEXP (*p, 1)) == CONST_INT) + { HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); HOST_WIDE_INT low, high; @@ -5413,7 +5522,7 @@ else return false; -@@ -6535,9 +6858,47 @@ +@@ -6569,9 +6921,47 @@ return for_each_rtx (&x, arm_tls_operand_p_1, NULL); } @@ -5462,17 +5571,17 @@ arm_cannot_force_const_mem (rtx x) { rtx base, offset; -@@ -7233,6 +7594,9 @@ +@@ -7267,6 +7657,9 @@ *total = COSTS_N_INSNS (4); return true; + case SET: + return false; + - default: - *total = COSTS_N_INSNS (4); - return false; -@@ -7580,6 +7944,9 @@ + case UNSPEC: + /* We cost this as high as our memory costs to allow this to + be hoisted from loops. */ +@@ -7623,6 +8016,9 @@ *total = COSTS_N_INSNS (1) + 1; return true; @@ -5482,7 +5591,7 @@ default: if (mode != VOIDmode) *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -@@ -8160,6 +8527,21 @@ +@@ -8203,6 +8599,21 @@ return cost; } @@ -5504,52 +5613,40 @@ static int fp_consts_inited = 0; /* Only zero is valid for VFP. Other values are also valid for FPA. */ -@@ -8617,24 +8999,106 @@ +@@ -8660,7 +9071,67 @@ return 1; } -/* Return a string suitable for output of Neon immediate logic operation -- MNEM. */ +/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If + the immediate is valid, write a constant suitable for using as an operand + to VSHR/VSHL to *MODCONST and the corresponding element width to + *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift, + because they have different limitations. */ - --char * --neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, -- int inverse, int quad) ++ +int +neon_immediate_valid_for_shift (rtx op, enum machine_mode mode, + rtx *modconst, int *elementwidth, + bool isleftshift) - { -- int width, is_valid; -- static char templ[40]; ++{ + unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); + unsigned int n_elts = CONST_VECTOR_NUNITS (op), i; + unsigned HOST_WIDE_INT last_elt = 0; + unsigned HOST_WIDE_INT maxshift; - -- is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); ++ + /* Split vector constant out into a byte vector. */ + for (i = 0; i < n_elts; i++) + { + rtx el = CONST_VECTOR_ELT (op, i); + unsigned HOST_WIDE_INT elpart; - -- gcc_assert (is_valid != 0); ++ + if (GET_CODE (el) == CONST_INT) + elpart = INTVAL (el); + else if (GET_CODE (el) == CONST_DOUBLE) + return 0; + else + gcc_unreachable (); - -- if (quad) -- sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); -- else -- sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); ++ + if (i != 0 && elpart != last_elt) + return 0; + @@ -5582,27 +5679,13 @@ +} + +/* Return a string suitable for output of Neon immediate logic operation -+ MNEM. */ -+ -+char * -+neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, -+ int inverse, int quad) -+{ -+ int width, is_valid; -+ static char templ[40]; -+ -+ is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); -+ -+ gcc_assert (is_valid != 0); -+ -+ if (quad) -+ sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); -+ else -+ sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); -+ -+ return templ; -+} -+ + MNEM. */ + + char * +@@ -8682,6 +9153,28 @@ + return templ; + } + +/* Return a string suitable for output of Neon immediate shift operation + (VSHR or VSHL) MNEM. */ + @@ -5621,10 +5704,14 @@ + sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width); + else + sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width); - - return templ; - } -@@ -9111,6 +9575,11 @@ ++ ++ return templ; ++} ++ + /* Output a sequence of pairwise operations to implement a reduction. + NOTE: We do "too much work" here, because pairwise operations work on two + registers-worth of operands in one go. Unfortunately we can't exploit those +@@ -9154,6 +9647,11 @@ if (GET_CODE (ind) == REG) return arm_address_register_rtx_p (ind, 0); @@ -5636,7 +5723,7 @@ return FALSE; } -@@ -9139,11 +9608,14 @@ +@@ -9182,11 +9680,14 @@ return GENERAL_REGS; } @@ -5654,28 +5741,50 @@ if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) return NO_REGS; -@@ -9812,6 +10284,9 @@ - rtx base_reg_rtx = NULL; - int i, stm_case; - -+ /* Write back of base register is currently only supported for Thumb 1. */ -+ int base_writeback = TARGET_THUMB1; -+ - /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be - easily extended if required. */ - gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); -@@ -9869,7 +10344,9 @@ - /* If it isn't an integer register, then we can't do this. */ - if (unsorted_regs[i] < 0 - || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) -- || (TARGET_THUMB2 && unsorted_regs[i] == base_reg) -+ /* The effects are unpredictable if the base register is -+ both updated and stored. */ -+ || (base_writeback && unsorted_regs[i] == base_reg) - || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM) - || unsorted_regs[i] > 14) - return 0; -@@ -10331,6 +10808,335 @@ +@@ -9501,6 +10002,42 @@ + } + } + ++/* Match pair of min/max operators that can be implemented via usat/ssat. */ ++ ++bool ++arm_sat_operator_match (rtx lo_bound, rtx hi_bound, ++ int *mask, bool *signed_sat) ++{ ++ /* The high bound must be a power of two minus one. */ ++ int log = exact_log2 (INTVAL (hi_bound) + 1); ++ if (log == -1) ++ return false; ++ ++ /* The low bound is either zero (for usat) or one less than the ++ negation of the high bound (for ssat). */ ++ if (INTVAL (lo_bound) == 0) ++ { ++ if (mask) ++ *mask = log; ++ if (signed_sat) ++ *signed_sat = false; ++ ++ return true; ++ } ++ ++ if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1) ++ { ++ if (mask) ++ *mask = log + 1; ++ if (signed_sat) ++ *signed_sat = true; ++ ++ return true; ++ } ++ ++ return false; ++} ++ + /* Return 1 if memory locations are adjacent. */ + int + adjacent_mem_locations (rtx a, rtx b) +@@ -10380,6 +10917,335 @@ return true; } @@ -6011,7 +6120,7 @@ int arm_gen_movmemqi (rtx *operands) { -@@ -10343,8 +11149,13 @@ +@@ -10392,8 +11258,13 @@ if (GET_CODE (operands[2]) != CONST_INT || GET_CODE (operands[3]) != CONST_INT @@ -6027,7 +6136,25 @@ return 0; dstbase = operands[0]; -@@ -11433,6 +12244,19 @@ +@@ -10821,7 +11692,7 @@ + return CC_Zmode; + + /* We can do an equality test in three Thumb instructions. */ +- if (!TARGET_ARM) ++ if (!TARGET_32BIT) + return CC_Zmode; + + /* FALLTHROUGH */ +@@ -10833,7 +11704,7 @@ + /* DImode unsigned comparisons can be implemented by cmp + + cmpeq without a scratch register. Not worth doing in + Thumb-2. */ +- if (TARGET_ARM) ++ if (TARGET_32BIT) + return CC_CZmode; + + /* FALLTHROUGH */ +@@ -11482,6 +12353,19 @@ return 0; } @@ -6047,7 +6174,7 @@ /* Move a minipool fix MP from its current location to before MAX_MP. If MAX_MP is NULL, then MP doesn't need moving, but the addressing constraints may need updating. */ -@@ -11979,8 +12803,12 @@ +@@ -12028,8 +12912,12 @@ within range. */ gcc_assert (GET_CODE (from) != BARRIER); @@ -6062,7 +6189,7 @@ /* If there is a jump table, add its length. */ tmp = is_jump_table (from); -@@ -12400,6 +13228,11 @@ +@@ -12449,6 +13337,11 @@ insn = table; } } @@ -6074,32 +6201,37 @@ } fix = minipool_fix_head; -@@ -16591,7 +17424,7 @@ +@@ -16640,7 +17533,8 @@ { rtx addr; bool postinc = FALSE; - unsigned align, modesize, align_bits; + unsigned align, memsize, align_bits; ++ rtx memsize_rtx; gcc_assert (GET_CODE (x) == MEM); addr = XEXP (x, 0); -@@ -16606,12 +17439,12 @@ +@@ -16655,14 +17549,15 @@ instruction (for some alignments) as an aid to the memory subsystem of the target. */ align = MEM_ALIGN (x) >> 3; - modesize = GET_MODE_SIZE (GET_MODE (x)); -+ memsize = INTVAL (MEM_SIZE (x)); ++ memsize_rtx = MEM_SIZE (x); ++ memsize = memsize_rtx ? INTVAL (memsize_rtx) : 0; /* Only certain alignment specifiers are supported by the hardware. */ - if (modesize == 16 && (align % 32) == 0) + if (memsize == 16 && (align % 32) == 0) align_bits = 256; - else if ((modesize == 8 || modesize == 16) && (align % 16) == 0) -+ else if ((memsize == 8 || memsize == 16) && (align % 16) == 0) ++ else if (memsize == 16 && (align % 16) == 0) align_bits = 128; - else if ((align % 8) == 0) +- else if ((align % 8) == 0) ++ else if (memsize >= 8 && (align % 8) == 0) align_bits = 64; -@@ -16663,6 +17496,11 @@ + else + align_bits = 0; +@@ -16712,6 +17607,11 @@ } return; @@ -6111,7 +6243,7 @@ /* Register specifier for vld1.16/vst1.16. Translate the S register number into a D register number and element index. */ case 'z': -@@ -17022,10 +17860,10 @@ +@@ -17071,10 +17971,10 @@ decremented/zeroed by arm_asm_output_opcode as the insns are output. */ /* Returns the index of the ARM condition code string in @@ -6126,7 +6258,7 @@ { enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); enum arm_cond_code code; -@@ -17049,11 +17887,11 @@ +@@ -17098,11 +17998,11 @@ case CC_DLTUmode: code = ARM_CC; dominance: @@ -6141,7 +6273,7 @@ case CC_NOOVmode: switch (comp_code) -@@ -17062,7 +17900,7 @@ +@@ -17111,7 +18011,7 @@ case EQ: return ARM_EQ; case GE: return ARM_PL; case LT: return ARM_MI; @@ -6150,7 +6282,7 @@ } case CC_Zmode: -@@ -17070,7 +17908,7 @@ +@@ -17119,7 +18019,7 @@ { case NE: return ARM_NE; case EQ: return ARM_EQ; @@ -6159,7 +6291,7 @@ } case CC_Nmode: -@@ -17078,7 +17916,7 @@ +@@ -17127,7 +18027,7 @@ { case NE: return ARM_MI; case EQ: return ARM_PL; @@ -6168,7 +6300,7 @@ } case CCFPEmode: -@@ -17103,7 +17941,7 @@ +@@ -17152,7 +18052,7 @@ /* UNEQ and LTGT do not have a representation. */ case UNEQ: /* Fall through. */ case LTGT: /* Fall through. */ @@ -6177,7 +6309,7 @@ } case CC_SWPmode: -@@ -17119,7 +17957,7 @@ +@@ -17168,7 +18068,7 @@ case GTU: return ARM_CC; case LEU: return ARM_CS; case LTU: return ARM_HI; @@ -6186,7 +6318,7 @@ } case CC_Cmode: -@@ -17127,7 +17965,7 @@ +@@ -17176,7 +18076,7 @@ { case LTU: return ARM_CS; case GEU: return ARM_CC; @@ -6195,7 +6327,7 @@ } case CC_CZmode: -@@ -17139,7 +17977,7 @@ +@@ -17188,7 +18088,7 @@ case GTU: return ARM_HI; case LEU: return ARM_LS; case LTU: return ARM_CC; @@ -6204,7 +6336,7 @@ } case CC_NCVmode: -@@ -17149,7 +17987,7 @@ +@@ -17198,7 +18098,7 @@ case LT: return ARM_LT; case GEU: return ARM_CS; case LTU: return ARM_CC; @@ -6213,7 +6345,7 @@ } case CCmode: -@@ -17165,13 +18003,22 @@ +@@ -17214,13 +18114,22 @@ case GTU: return ARM_HI; case LEU: return ARM_LS; case LTU: return ARM_CC; @@ -6237,7 +6369,7 @@ /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed instructions. */ void -@@ -17783,926 +18630,618 @@ +@@ -17832,920 +18741,612 @@ return value; } @@ -7052,18 +7184,23 @@ - tree_cons (NULL_TREE, - V4HI_type_node, - endlink)))); -- -- tree di_ftype_v4hi_v4hi -- = build_function_type (long_long_unsigned_type_node, -- tree_cons (NULL_TREE, V4HI_type_node, -- tree_cons (NULL_TREE, V4HI_type_node, -- endlink))); + tree intQI_pointer_node; + tree intHI_pointer_node; + tree intSI_pointer_node; + tree intDI_pointer_node; + tree float_pointer_node; +- tree di_ftype_v4hi_v4hi +- = build_function_type (long_long_unsigned_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- tree_cons (NULL_TREE, V4HI_type_node, +- endlink))); ++ tree const_intQI_node; ++ tree const_intHI_node; ++ tree const_intSI_node; ++ tree const_intDI_node; ++ tree const_float_node; + - /* Normal vector binops. */ - tree v8qi_ftype_v8qi_v8qi - = build_function_type (V8QI_type_node, @@ -7086,11 +7223,11 @@ - tree_cons (NULL_TREE, - long_long_unsigned_type_node, - endlink))); -+ tree const_intQI_node; -+ tree const_intHI_node; -+ tree const_intSI_node; -+ tree const_intDI_node; -+ tree const_float_node; ++ tree const_intQI_pointer_node; ++ tree const_intHI_pointer_node; ++ tree const_intSI_pointer_node; ++ tree const_intDI_pointer_node; ++ tree const_float_pointer_node; - /* Add all builtins that are more or less simple operations on two - operands. */ @@ -7100,14 +7237,6 @@ - mask-generating compares. */ - enum machine_mode mode; - tree type; -+ tree const_intQI_pointer_node; -+ tree const_intHI_pointer_node; -+ tree const_intSI_pointer_node; -+ tree const_intDI_pointer_node; -+ tree const_float_pointer_node; - -- if (d->name == 0) -- continue; + tree V8QI_type_node; + tree V4HI_type_node; + tree V2SI_type_node; @@ -7118,12 +7247,19 @@ + tree V4SF_type_node; + tree V2DI_type_node; -- mode = insn_data[d->icode].operand[1].mode; +- if (d->name == 0) +- continue; + tree intUQI_type_node; + tree intUHI_type_node; + tree intUSI_type_node; + tree intUDI_type_node; +- mode = insn_data[d->icode].operand[1].mode; ++ tree intEI_type_node; ++ tree intOI_type_node; ++ tree intCI_type_node; ++ tree intXI_type_node; + - switch (mode) - { - case V8QImode: @@ -7138,14 +7274,6 @@ - case DImode: - type = di_ftype_di_di; - break; -+ tree intEI_type_node; -+ tree intOI_type_node; -+ tree intCI_type_node; -+ tree intXI_type_node; - -- default: -- gcc_unreachable (); -- } + tree V8QI_pointer_node; + tree V4HI_pointer_node; + tree V2SI_pointer_node; @@ -7156,8 +7284,9 @@ + tree V4SF_pointer_node; + tree V2DI_pointer_node; -- def_mbuiltin (d->mask, d->name, type, d->code); -- } +- default: +- gcc_unreachable (); +- } + tree void_ftype_pv8qi_v8qi_v8qi; + tree void_ftype_pv4hi_v4hi_v4hi; + tree void_ftype_pv2si_v2si_v2si; @@ -7169,6 +7298,12 @@ + tree void_ftype_pv4sf_v4sf_v4sf; + tree void_ftype_pv2di_v2di_v2di; +- def_mbuiltin (d->mask, d->name, type, d->code); +- } ++ tree reinterp_ftype_dreg[5][5]; ++ tree reinterp_ftype_qreg[5][5]; ++ tree dreg_types[5], qreg_types[5]; + - /* Add the remaining MMX insns with somewhat more complicated types. */ - def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO); - def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX); @@ -7260,14 +7395,6 @@ - def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB); - def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT); -} -+ tree reinterp_ftype_dreg[5][5]; -+ tree reinterp_ftype_qreg[5][5]; -+ tree dreg_types[5], qreg_types[5]; - --static void --arm_init_tls_builtins (void) --{ -- tree ftype, decl; + /* Create distinguished type nodes for NEON vector element types, + and pointers to values of such types, so we can detect them later. */ + neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); @@ -7280,6 +7407,11 @@ + TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; + layout_type (neon_float_type_node); +-static void +-arm_init_tls_builtins (void) +-{ +- tree ftype, decl; +- - ftype = build_function_type (ptr_type_node, void_list_node); - decl = add_builtin_function ("__builtin_thread_pointer", ftype, - ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD, @@ -7287,24 +7419,7 @@ - TREE_NOTHROW (decl) = 1; - TREE_READONLY (decl) = 1; -} -+ /* Define typedefs which exactly correspond to the modes we are basing vector -+ types on. If you change these names you'll need to change -+ the table used by arm_mangle_type too. */ -+ (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, -+ "__builtin_neon_qi"); -+ (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, -+ "__builtin_neon_hi"); -+ (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, -+ "__builtin_neon_si"); -+ (*lang_hooks.types.register_builtin_type) (neon_float_type_node, -+ "__builtin_neon_sf"); -+ (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, -+ "__builtin_neon_di"); -+ (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, -+ "__builtin_neon_poly8"); -+ (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, -+ "__builtin_neon_poly16"); - +- -enum neon_builtin_type_bits { - T_V8QI = 0x0001, - T_V4HI = 0x0002, @@ -7737,44 +7852,59 @@ - "__builtin_neon_poly8"); - (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, - "__builtin_neon_poly16"); -- -- intQI_pointer_node = build_pointer_type (neon_intQI_type_node); -- intHI_pointer_node = build_pointer_type (neon_intHI_type_node); -- intSI_pointer_node = build_pointer_type (neon_intSI_type_node); -- intDI_pointer_node = build_pointer_type (neon_intDI_type_node); -- float_pointer_node = build_pointer_type (neon_float_type_node); -+ intQI_pointer_node = build_pointer_type (neon_intQI_type_node); -+ intHI_pointer_node = build_pointer_type (neon_intHI_type_node); -+ intSI_pointer_node = build_pointer_type (neon_intSI_type_node); -+ intDI_pointer_node = build_pointer_type (neon_intDI_type_node); -+ float_pointer_node = build_pointer_type (neon_float_type_node); - - /* Next create constant-qualified versions of the above types. */ - const_intQI_node = build_qualified_type (neon_intQI_type_node, -@@ -18843,252 +19382,740 @@ - } - } ++ /* Define typedefs which exactly correspond to the modes we are basing vector ++ types on. If you change these names you'll need to change ++ the table used by arm_mangle_type too. */ ++ (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, ++ "__builtin_neon_qi"); ++ (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, ++ "__builtin_neon_hi"); ++ (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, ++ "__builtin_neon_si"); ++ (*lang_hooks.types.register_builtin_type) (neon_float_type_node, ++ "__builtin_neon_sf"); ++ (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, ++ "__builtin_neon_di"); ++ (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, ++ "__builtin_neon_poly8"); ++ (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, ++ "__builtin_neon_poly16"); -- for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++) -+ for (i = 0, fcode = ARM_BUILTIN_NEON_BASE; -+ i < ARRAY_SIZE (neon_builtin_data); -+ i++, fcode++) - { - neon_builtin_datum *d = &neon_builtin_data[i]; -- unsigned int j, codeidx = 0; + intQI_pointer_node = build_pointer_type (neon_intQI_type_node); + intHI_pointer_node = build_pointer_type (neon_intHI_type_node); +@@ -18880,264 +19481,752 @@ + qreg_types[3] = V4SF_type_node; + qreg_types[4] = V2DI_type_node; -- d->base_fcode = fcode; -- -- for (j = 0; j < T_MAX; j++) -- { -- const char* const modenames[] = { -- "v8qi", "v4hi", "v2si", "v2sf", "di", -- "v16qi", "v8hi", "v4si", "v4sf", "v2di" -- }; -- char namebuf[60]; -- tree ftype = NULL; -- enum insn_code icode; -- int is_load = 0, is_store = 0; +- for (i = 0; i < 5; i++) +- { +- int j; +- for (j = 0; j < 5; j++) +- { +- reinterp_ftype_dreg[i][j] +- = build_function_type_list (dreg_types[i], dreg_types[j], NULL); +- reinterp_ftype_qreg[i][j] +- = build_function_type_list (qreg_types[i], qreg_types[j], NULL); +- } +- } ++ for (i = 0; i < 5; i++) ++ { ++ int j; ++ for (j = 0; j < 5; j++) ++ { ++ reinterp_ftype_dreg[i][j] ++ = build_function_type_list (dreg_types[i], dreg_types[j], NULL); ++ reinterp_ftype_qreg[i][j] ++ = build_function_type_list (qreg_types[i], qreg_types[j], NULL); ++ } ++ } ++ ++ for (i = 0, fcode = ARM_BUILTIN_NEON_BASE; ++ i < ARRAY_SIZE (neon_builtin_data); ++ i++, fcode++) ++ { ++ neon_builtin_datum *d = &neon_builtin_data[i]; ++ + const char* const modenames[] = { + "v8qi", "v4hi", "v2si", "v2sf", "di", + "v16qi", "v8hi", "v4si", "v4sf", "v2di", @@ -7833,69 +7963,21 @@ + { + int k; + tree return_type = void_type_node, args = void_list_node; - -- if ((d->bits & (1 << j)) == 0) -- continue; ++ + /* Build a function type directly from the insn_data for + this builtin. The build_function_type() function takes + care of removing duplicates for us. */ + for (k = insn_data[d->code].n_operands - 1; k >= 0; k--) + { + tree eltype; - -- icode = d->codes[codeidx++]; ++ + if (is_load && k == 1) + { + /* Neon load patterns always have the memory + operand in the operand 1 position. */ + gcc_assert (insn_data[d->code].operand[k].predicate + == neon_struct_operand); - -- switch (d->itype) -- { -- case NEON_LOAD1: -- case NEON_LOAD1LANE: -- case NEON_LOADSTRUCT: -- case NEON_LOADSTRUCTLANE: -- is_load = 1; -- /* Fall through. */ -- case NEON_STORE1: -- case NEON_STORE1LANE: -- case NEON_STORESTRUCT: -- case NEON_STORESTRUCTLANE: -- if (!is_load) -- is_store = 1; -- /* Fall through. */ -- case NEON_UNOP: -- case NEON_BINOP: -- case NEON_LOGICBINOP: -- case NEON_SHIFTINSERT: -- case NEON_TERNOP: -- case NEON_GETLANE: -- case NEON_SETLANE: -- case NEON_CREATE: -- case NEON_DUP: -- case NEON_DUPLANE: -- case NEON_SHIFTIMM: -- case NEON_SHIFTACC: -- case NEON_COMBINE: -- case NEON_SPLIT: -- case NEON_CONVERT: -- case NEON_FIXCONV: -- case NEON_LANEMUL: -- case NEON_LANEMULL: -- case NEON_LANEMULH: -- case NEON_LANEMAC: -- case NEON_SCALARMUL: -- case NEON_SCALARMULL: -- case NEON_SCALARMULH: -- case NEON_SCALARMAC: -- case NEON_SELECT: -- case NEON_VTBL: -- case NEON_VTBX: -- { -- int k; -- tree return_type = void_type_node, args = void_list_node; ++ + switch (d->mode) + { + case T_V8QI: @@ -7922,30 +8004,19 @@ + case T_V2DI: + eltype = const_intDI_pointer_node; + break; - -- /* Build a function type directly from the insn_data for this -- builtin. The build_function_type() function takes care of -- removing duplicates for us. */ -- for (k = insn_data[icode].n_operands - 1; k >= 0; k--) ++ + default: gcc_unreachable (); + } + } + else if (is_store && k == 0) - { -- tree eltype; ++ { + /* Similarly, Neon store patterns use operand 0 as + the memory location to store to. */ + gcc_assert (insn_data[d->code].operand[k].predicate + == neon_struct_operand); - -- if (is_load && k == 1) ++ + switch (d->mode) - { -- /* Neon load patterns always have the memory operand -- (a SImode pointer) in the operand 1 position. We -- want a const pointer to the element type in that -- position. */ -- gcc_assert (insn_data[icode].operand[k].mode == SImode); ++ { + case T_V8QI: + case T_V16QI: + eltype = intQI_pointer_node; @@ -7970,13 +8041,7 @@ + case T_V2DI: + eltype = intDI_pointer_node; + break; - -- switch (1 << j) -- { -- case T_V8QI: -- case T_V16QI: -- eltype = const_intQI_pointer_node; -- break; ++ + default: gcc_unreachable (); + } + } @@ -8010,29 +8075,17 @@ + default: gcc_unreachable (); + } + } - -- case T_V4HI: -- case T_V8HI: -- eltype = const_intHI_pointer_node; -- break; ++ + if (k == 0 && !is_store) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } - -- case T_V2SI: -- case T_V4SI: -- eltype = const_intSI_pointer_node; -- break; ++ + ftype = build_function_type (return_type, args); + } + break; - -- case T_V2SF: -- case T_V4SF: -- eltype = const_float_pointer_node; -- break; ++ + case NEON_RESULTPAIR: + { + switch (insn_data[d->code].operand[1].mode) @@ -8051,11 +8104,7 @@ + } + } + break; - -- case T_DI: -- case T_V2DI: -- eltype = const_intDI_pointer_node; -- break; ++ + case NEON_REINTERP: + { + /* We iterate over 5 doubleword types, then 5 quadword @@ -8077,39 +8126,15 @@ + } + } + break; - -- default: gcc_unreachable (); -- } -- } -- else if (is_store && k == 0) -- { -- /* Similarly, Neon store patterns use operand 0 as -- the memory location to store to (a SImode pointer). -- Use a pointer to the element type of the store in -- that position. */ -- gcc_assert (insn_data[icode].operand[k].mode == SImode); ++ + default: + gcc_unreachable (); + } - -- switch (1 << j) -- { -- case T_V8QI: -- case T_V16QI: -- eltype = intQI_pointer_node; -- break; ++ + gcc_assert (ftype != NULL); - -- case T_V4HI: -- case T_V8HI: -- eltype = intHI_pointer_node; -- break; ++ + sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]); - -- case T_V2SI: -- case T_V4SI: -- eltype = intSI_pointer_node; -- break; ++ + decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, + NULL_TREE); + arm_builtin_decls[fcode] = decl; @@ -8423,11 +8448,7 @@ + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + endlink)); - -- case T_V2SF: -- case T_V4SF: -- eltype = float_pointer_node; -- break; ++ + tree di_ftype_di_v4hi_v4hi + = build_function_type (long_long_unsigned_type_node, + tree_cons (NULL_TREE, @@ -8437,10 +8458,7 @@ + V4HI_type_node, + endlink)))); -- case T_DI: -- case T_V2DI: -- eltype = intDI_pointer_node; -- break; +- for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++) + tree di_ftype_v4hi_v4hi + = build_function_type (long_long_unsigned_type_node, + tree_cons (NULL_TREE, V4HI_type_node, @@ -8473,7 +8491,153 @@ + /* Add all builtins that are more or less simple operations on two + operands. */ + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) -+ { + { +- neon_builtin_datum *d = &neon_builtin_data[i]; +- unsigned int j, codeidx = 0; +- +- d->base_fcode = fcode; +- +- for (j = 0; j < T_MAX; j++) +- { +- const char* const modenames[] = { +- "v8qi", "v4hi", "v2si", "v2sf", "di", +- "v16qi", "v8hi", "v4si", "v4sf", "v2di" +- }; +- char namebuf[60]; +- tree ftype = NULL; +- enum insn_code icode; +- int is_load = 0, is_store = 0; +- +- if ((d->bits & (1 << j)) == 0) +- continue; +- +- icode = d->codes[codeidx++]; +- +- switch (d->itype) +- { +- case NEON_LOAD1: +- case NEON_LOAD1LANE: +- case NEON_LOADSTRUCT: +- case NEON_LOADSTRUCTLANE: +- is_load = 1; +- /* Fall through. */ +- case NEON_STORE1: +- case NEON_STORE1LANE: +- case NEON_STORESTRUCT: +- case NEON_STORESTRUCTLANE: +- if (!is_load) +- is_store = 1; +- /* Fall through. */ +- case NEON_UNOP: +- case NEON_BINOP: +- case NEON_LOGICBINOP: +- case NEON_SHIFTINSERT: +- case NEON_TERNOP: +- case NEON_GETLANE: +- case NEON_SETLANE: +- case NEON_CREATE: +- case NEON_DUP: +- case NEON_DUPLANE: +- case NEON_SHIFTIMM: +- case NEON_SHIFTACC: +- case NEON_COMBINE: +- case NEON_SPLIT: +- case NEON_CONVERT: +- case NEON_FIXCONV: +- case NEON_LANEMUL: +- case NEON_LANEMULL: +- case NEON_LANEMULH: +- case NEON_LANEMAC: +- case NEON_SCALARMUL: +- case NEON_SCALARMULL: +- case NEON_SCALARMULH: +- case NEON_SCALARMAC: +- case NEON_SELECT: +- case NEON_VTBL: +- case NEON_VTBX: +- { +- int k; +- tree return_type = void_type_node, args = void_list_node; +- +- /* Build a function type directly from the insn_data for this +- builtin. The build_function_type() function takes care of +- removing duplicates for us. */ +- for (k = insn_data[icode].n_operands - 1; k >= 0; k--) +- { +- tree eltype; +- +- if (is_load && k == 1) +- { +- /* Neon load patterns always have the memory operand +- (a SImode pointer) in the operand 1 position. We +- want a const pointer to the element type in that +- position. */ +- gcc_assert (insn_data[icode].operand[k].mode == SImode); +- +- switch (1 << j) +- { +- case T_V8QI: +- case T_V16QI: +- eltype = const_intQI_pointer_node; +- break; +- +- case T_V4HI: +- case T_V8HI: +- eltype = const_intHI_pointer_node; +- break; +- +- case T_V2SI: +- case T_V4SI: +- eltype = const_intSI_pointer_node; +- break; +- +- case T_V2SF: +- case T_V4SF: +- eltype = const_float_pointer_node; +- break; +- +- case T_DI: +- case T_V2DI: +- eltype = const_intDI_pointer_node; +- break; +- +- default: gcc_unreachable (); +- } +- } +- else if (is_store && k == 0) +- { +- /* Similarly, Neon store patterns use operand 0 as +- the memory location to store to (a SImode pointer). +- Use a pointer to the element type of the store in +- that position. */ +- gcc_assert (insn_data[icode].operand[k].mode == SImode); +- +- switch (1 << j) +- { +- case T_V8QI: +- case T_V16QI: +- eltype = intQI_pointer_node; +- break; +- +- case T_V4HI: +- case T_V8HI: +- eltype = intHI_pointer_node; +- break; +- +- case T_V2SI: +- case T_V4SI: +- eltype = intSI_pointer_node; +- break; +- +- case T_V2SF: +- case T_V4SF: +- eltype = float_pointer_node; +- break; +- +- case T_DI: +- case T_V2DI: +- eltype = intDI_pointer_node; +- break; + /* Use one of the operands; the target can have a different mode for + mask-generating compares. */ + enum machine_mode mode; @@ -8708,7 +8872,7 @@ } static void -@@ -19115,6 +20142,17 @@ +@@ -19164,6 +20253,17 @@ arm_init_fp16_builtins (); } @@ -8726,7 +8890,7 @@ /* Implement TARGET_INVALID_PARAMETER_TYPE. */ static const char * -@@ -19266,55 +20304,68 @@ +@@ -19315,55 +20415,68 @@ return target; } @@ -8819,9 +8983,9 @@ + /* Create a type that describes the full access. */ + upper_bound = build_int_cst (size_type_node, nelems - 1); + array_type = build_array_type (elem_type, build_index_type (upper_bound)); -+ + /* Dereference EXP using that type. */ -+ exp = convert (build_pointer_type (array_type), exp); ++ exp = fold_convert (build_pointer_type (array_type), exp); ++ + return fold_build2 (MEM_REF, array_type, exp, + build_int_cst (TREE_TYPE (exp), 0)); +} @@ -8833,7 +8997,7 @@ tree exp, ...) { va_list ap; -@@ -19323,7 +20374,9 @@ +@@ -19372,7 +20485,9 @@ rtx op[NEON_MAX_BUILTIN_ARGS]; enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; @@ -8843,7 +9007,7 @@ if (have_retval && (!target -@@ -19341,26 +20394,46 @@ +@@ -19390,26 +20505,46 @@ break; else { @@ -8893,7 +9057,7 @@ case NEON_ARG_STOP: gcc_unreachable (); } -@@ -19438,15 +20511,17 @@ +@@ -19487,15 +20622,17 @@ static rtx arm_expand_neon_builtin (int fcode, tree exp, rtx target) { @@ -8914,7 +9078,7 @@ NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); case NEON_BINOP: -@@ -19456,90 +20531,90 @@ +@@ -19505,90 +20642,90 @@ case NEON_SCALARMULH: case NEON_SHIFTINSERT: case NEON_LOGICBINOP: @@ -9025,67 +9189,7 @@ NEON_ARG_STOP); } -@@ -19571,39 +20646,34 @@ - emit_move_insn (mem, tmp2); - } - --/* Set up operands for a register copy from src to dest, taking care not to -- clobber registers in the process. -- FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't -- be called with a large N, so that should be OK. */ -+/* Set up OPERANDS for a register copy from SRC to DEST, taking care -+ not to early-clobber SRC registers in the process. - -+ We assume that the operands described by SRC and DEST represent a -+ decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the -+ number of components into which the copy has been decomposed. */ - void - neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count) - { -- unsigned int copied = 0, opctr = 0; -- unsigned int done = (1 << count) - 1; -- unsigned int i, j; -+ unsigned int i; - -- while (copied != done) -+ if (!reg_overlap_mentioned_p (operands[0], operands[1]) -+ || REGNO (operands[0]) < REGNO (operands[1])) - { - for (i = 0; i < count; i++) -- { -- int good = 1; -- -- for (j = 0; good && j < count; j++) -- if (i != j && (copied & (1 << j)) == 0 -- && reg_overlap_mentioned_p (src[j], dest[i])) -- good = 0; -- -- if (good) -- { -- operands[opctr++] = dest[i]; -- operands[opctr++] = src[i]; -- copied |= 1 << i; -- } -- } -+ { -+ operands[2 * i] = dest[i]; -+ operands[2 * i + 1] = src[i]; -+ } -+ } -+ else -+ { -+ for (i = 0; i < count; i++) -+ { -+ operands[2 * i] = dest[count - i - 1]; -+ operands[2 * i + 1] = src[count - i - 1]; -+ } - } -- -- gcc_assert (opctr == count * 2); - } - - /* Expand an expression EXP that calls a built-in function, -@@ -21455,6 +22525,8 @@ +@@ -21501,6 +22638,8 @@ const char *fpu_name; if (arm_selected_arch) asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); @@ -9094,7 +9198,7 @@ else asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name); -@@ -21518,6 +22590,10 @@ +@@ -21564,6 +22703,10 @@ val = 6; asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); @@ -9105,7 +9209,7 @@ /* Tag_ABI_FP_16bit_format. */ if (arm_fp16_format) asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", -@@ -22261,7 +23337,21 @@ +@@ -22307,7 +23450,21 @@ return false; } @@ -9128,7 +9232,7 @@ registers when autovectorizing for Neon, at least until multiple vector widths are supported properly by the middle-end. */ -@@ -22272,15 +23362,15 @@ +@@ -22318,15 +23475,15 @@ switch (mode) { case SFmode: @@ -9149,7 +9253,7 @@ return V2DImode; break; -@@ -22305,14 +23395,16 @@ +@@ -22351,14 +23508,16 @@ /* Implement TARGET_CLASS_LIKELY_SPILLED_P. @@ -9171,7 +9275,7 @@ || rclass == CC_REG) return true; -@@ -22964,8 +24056,13 @@ +@@ -23010,8 +24169,13 @@ { switch (arm_tune) { @@ -9185,7 +9289,7 @@ case cortexa5: case cortexa8: case cortexa9: -@@ -23218,12 +24315,26 @@ +@@ -23264,12 +24428,26 @@ rtx target, rtx memory) { @@ -9216,7 +9320,7 @@ } /* Emit a strex{b,h,d, } instruction appropriate for the specified -@@ -23236,14 +24347,41 @@ +@@ -23282,14 +24460,41 @@ rtx value, rtx memory) { @@ -9263,7 +9367,7 @@ } /* Helper to emit a two operand instruction. */ -@@ -23285,7 +24423,7 @@ +@@ -23331,7 +24536,7 @@ required_value: @@ -9272,7 +9376,7 @@ the modify to continue, if NULL no comparsion is performed. */ static void arm_output_sync_loop (emit_f emit, -@@ -23299,7 +24437,13 @@ +@@ -23345,7 +24550,13 @@ enum attr_sync_op sync_op, int early_barrier_required) { @@ -9287,7 +9391,7 @@ gcc_assert (t1 != t2); -@@ -23310,82 +24454,142 @@ +@@ -23356,82 +24567,142 @@ arm_output_ldrex (emit, mode, old_value, memory); @@ -9451,20 +9555,7 @@ break; default: -@@ -23393,8 +24597,11 @@ - } - } - -- arm_process_output_memory_barrier (emit, NULL); -+ /* Note: label is before barrier so that in cmp failure case we still get -+ a barrier to stop subsequent loads floating upwards past the ldrex -+ PR target/48126. */ - arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); -+ arm_process_output_memory_barrier (emit, NULL); - } - - static rtx -@@ -23488,7 +24695,7 @@ +@@ -23537,7 +24808,7 @@ target = gen_reg_rtx (mode); memory = arm_legitimize_sync_memory (memory); @@ -9473,7 +9564,7 @@ { rtx load_temp = gen_reg_rtx (SImode); -@@ -23507,6 +24714,12 @@ +@@ -23556,6 +24827,12 @@ } } @@ -9486,7 +9577,7 @@ static bool arm_vector_alignment_reachable (const_tree type, bool is_packed) { -@@ -23660,4 +24873,53 @@ +@@ -23709,4 +24986,53 @@ return NO_REGS; } @@ -9540,6 +9631,54 @@ + #include "gt-arm.h" + +--- a/src/gcc/config/arm/arm-cores.def ++++ b/src/gcc/config/arm/arm-cores.def +@@ -70,10 +70,10 @@ + /* V4 Architecture Processors */ + ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) + ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) +-ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +-ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +-ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +-ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) ++ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) ++ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) ++ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) ++ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) + ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul) + ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul) + +@@ -122,15 +122,19 @@ + ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) + ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) + ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) +-ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) +-ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e) +-ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) +-ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) ++ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, v6t2) ++ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) ++ARM_CORE("generic-armv7-a", genericv7a, 7A, FL_LDSCHED, cortex) ++ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, cortex_a5) ++ARM_CORE("cortex-a7", cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) ++ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +-ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e) +-ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) +-ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) +-ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) ++ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex) ++ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) ++ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) ++ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) ++ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) ++ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) ++ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, cortex) ++ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, cortex) ++ --- a/src/gcc/config/arm/arm.h +++ b/src/gcc/config/arm/arm.h @@ -47,6 +47,8 @@ @@ -9578,17 +9717,7 @@ /* We could use unified syntax for arm mode, but for now we just use it for Thumb-2. */ -@@ -294,7 +300,8 @@ - #define TARGET_HAVE_DMB (arm_arch7) - - /* Nonzero if this chip implements a memory barrier via CP15. */ --#define TARGET_HAVE_DMB_MCR (arm_arch6k && ! TARGET_HAVE_DMB) -+#define TARGET_HAVE_DMB_MCR (arm_arch6 && ! TARGET_HAVE_DMB \ -+ && ! TARGET_THUMB1) - - /* Nonzero if this chip implements a memory barrier instruction. */ - #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) -@@ -302,8 +309,16 @@ +@@ -303,8 +309,16 @@ /* Nonzero if this chip supports ldrex and strex */ #define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) @@ -9607,7 +9736,7 @@ /* True iff the full BPABI is being used. If TARGET_BPABI is true, then TARGET_AAPCS_BASED must be true -- but the converse does not -@@ -489,8 +504,11 @@ +@@ -490,8 +504,11 @@ /* Nonzero if chip supports Thumb 2. */ extern int arm_arch_thumb2; @@ -9621,7 +9750,7 @@ #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_APCS_FRAME) -@@ -1171,12 +1189,12 @@ +@@ -1172,12 +1189,12 @@ } /* FPA registers can't do subreg as all values are reformatted to internal @@ -9640,7 +9769,7 @@ : 0) /* The class value for index registers, and the one for base regs. */ -@@ -1187,7 +1205,7 @@ +@@ -1188,7 +1205,7 @@ when addressing quantities in QI or HI mode; if we don't know the mode, then we must be conservative. */ #define MODE_BASE_REG_CLASS(MODE) \ @@ -9649,7 +9778,7 @@ (((MODE) == SImode) ? BASE_REGS : LO_REGS)) /* For Thumb we can not support SP+reg addressing, so we return LO_REGS -@@ -1777,27 +1795,6 @@ +@@ -1778,27 +1795,6 @@ #define TARGET_DEFAULT_WORD_RELOCATIONS 0 #endif @@ -9677,7 +9806,7 @@ #ifndef SUBTARGET_NAME_ENCODING_LENGTHS #define SUBTARGET_NAME_ENCODING_LENGTHS #endif -@@ -1972,7 +1969,7 @@ +@@ -1973,7 +1969,7 @@ : min >= -4096 && max < 4096 \ ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ : SImode) \ @@ -9686,7 +9815,7 @@ : (max >= 0x200) ? HImode \ : QImode)) -@@ -2041,7 +2038,8 @@ +@@ -2042,7 +2038,8 @@ /* Try to generate sequences that don't involve branches, we can then use conditional instructions */ #define BRANCH_COST(speed_p, predictable_p) \ @@ -9696,7 +9825,7 @@ /* Position Independent Code. */ /* We decide which register to use based on the compilation options and -@@ -2279,178 +2277,6 @@ +@@ -2280,178 +2277,6 @@ : arm_gen_return_addr_mask ()) @@ -9875,7 +10004,7 @@ /* Do not emit .note.GNU-stack by default. */ #ifndef NEED_INDICATE_EXEC_STACK #define NEED_INDICATE_EXEC_STACK 0 -@@ -2460,4 +2286,25 @@ +@@ -2461,4 +2286,25 @@ instruction. */ #define MAX_LDM_STM_OPS 4 @@ -9919,18 +10048,35 @@ ;; UNSPEC Usage: ;; Note: sin and cos are no-longer used. -@@ -104,6 +113,10 @@ - (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from +@@ -105,6 +114,10 @@ ; another symbolic address. (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier. -+ (UNSPEC_UNALIGNED_LOAD 29) ; Used to represent ldr/ldrh instructions that access + (UNSPEC_PIC_UNIFIED 29) ; Create a common pic addressing form. ++ (UNSPEC_UNALIGNED_LOAD 30) ; Used to represent ldr/ldrh instructions that access + ; unaligned locations, on architectures which support + ; that. -+ (UNSPEC_UNALIGNED_STORE 30) ; Same for str/strh. ++ (UNSPEC_UNALIGNED_STORE 31) ; Same for str/strh. + ] + ) + +@@ -144,6 +157,7 @@ + (VUNSPEC_SYNC_OP 23) ; Represent a sync_ + (VUNSPEC_SYNC_NEW_OP 24) ; Represent a sync_new_ + (VUNSPEC_SYNC_OLD_OP 25) ; Represent a sync_old_ ++ (VUNSPEC_SYNC_RELEASE 26) ; Represent a sync_lock_release. ] ) + +@@ -272,7 +286,7 @@ + ;; scheduling information. -@@ -332,6 +345,13 @@ + (define_attr "insn" +- "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" ++ "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other" + (const_string "other")) + + ; TYPE attribute is used to detect floating point instructions which, if +@@ -333,6 +347,13 @@ (const_string "mult") (const_string "alu"))) @@ -9944,7 +10090,7 @@ ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) -@@ -490,7 +510,7 @@ +@@ -491,7 +512,7 @@ (define_attr "tune_cortexr4" "yes,no" (const (if_then_else @@ -9953,7 +10099,7 @@ (const_string "yes") (const_string "no")))) -@@ -498,7 +518,7 @@ +@@ -499,7 +520,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else @@ -9962,7 +10108,7 @@ (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) -@@ -524,6 +544,7 @@ +@@ -525,6 +546,7 @@ (include "cortex-a5.md") (include "cortex-a8.md") (include "cortex-a9.md") @@ -9970,7 +10116,7 @@ (include "cortex-r4.md") (include "cortex-r4f.md") (include "cortex-m4.md") -@@ -701,21 +722,24 @@ +@@ -702,21 +724,24 @@ ;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will ;; put the duplicated register first, and not try the commutative version. (define_insn_and_split "*arm_addsi3" @@ -10000,7 +10146,7 @@ && (reload_completed || !arm_eliminable_register (operands[1]))" [(clobber (const_int 0))] " -@@ -724,8 +748,9 @@ +@@ -725,8 +750,9 @@ operands[1], 0); DONE; " @@ -10012,7 +10158,7 @@ ) (define_insn_and_split "*thumb1_addsi3" -@@ -791,7 +816,7 @@ +@@ -792,7 +818,7 @@ "" ) @@ -10021,7 +10167,7 @@ [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (plus:SI (match_operand:SI 1 "s_register_operand" "r, r") -@@ -1806,7 +1831,37 @@ +@@ -1807,7 +1833,37 @@ (set_attr "predicable" "yes")] ) @@ -10060,7 +10206,7 @@ [(set (match_operand:DI 0 "s_register_operand" "=r") (plus:DI (mult:DI (sign_extend:DI -@@ -1819,6 +1874,39 @@ +@@ -1820,6 +1876,39 @@ [(set_attr "insn" "smlalxy") (set_attr "predicable" "yes")]) @@ -10100,7 +10246,7 @@ (define_expand "mulsf3" [(set (match_operand:SF 0 "s_register_operand" "") (mult:SF (match_operand:SF 1 "s_register_operand" "") -@@ -2384,10 +2472,10 @@ +@@ -2385,10 +2474,10 @@ ;;; this insv pattern, so this pattern needs to be reevalutated. (define_expand "insv" @@ -10115,7 +10261,7 @@ "TARGET_ARM || arm_arch_thumb2" " { -@@ -2398,35 +2486,70 @@ +@@ -2399,35 +2488,70 @@ if (arm_arch_thumb2) { @@ -10204,7 +10350,68 @@ target = copy_rtx (operands[0]); /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical subreg as the final target. */ -@@ -3618,12 +3741,10 @@ +@@ -3300,6 +3424,60 @@ + (const_int 12)))] + ) + ++(define_code_iterator SAT [smin smax]) ++(define_code_iterator SATrev [smin smax]) ++(define_code_attr SATlo [(smin "1") (smax "2")]) ++(define_code_attr SAThi [(smin "2") (smax "1")]) ++ ++(define_insn "*satsi_" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (SAT:SI (SATrev:SI (match_operand:SI 3 "s_register_operand" "r") ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "TARGET_32BIT && arm_arch6 && != ++ && arm_sat_operator_match (operands[], operands[], NULL, NULL)" ++{ ++ int mask; ++ bool signed_sat; ++ if (!arm_sat_operator_match (operands[], operands[], ++ &mask, &signed_sat)) ++ gcc_unreachable (); ++ ++ operands[1] = GEN_INT (mask); ++ if (signed_sat) ++ return "ssat%?\t%0, %1, %3"; ++ else ++ return "usat%?\t%0, %1, %3"; ++} ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "sat")]) ++ ++(define_insn "*satsi__shift" ++ [(set (match_operand:SI 0 "s_register_operand" "=r") ++ (SAT:SI (SATrev:SI (match_operator:SI 3 "sat_shift_operator" ++ [(match_operand:SI 4 "s_register_operand" "r") ++ (match_operand:SI 5 "const_int_operand" "i")]) ++ (match_operand:SI 1 "const_int_operand" "i")) ++ (match_operand:SI 2 "const_int_operand" "i")))] ++ "TARGET_32BIT && arm_arch6 && != ++ && arm_sat_operator_match (operands[], operands[], NULL, NULL)" ++{ ++ int mask; ++ bool signed_sat; ++ if (!arm_sat_operator_match (operands[], operands[], ++ &mask, &signed_sat)) ++ gcc_unreachable (); ++ ++ operands[1] = GEN_INT (mask); ++ if (signed_sat) ++ return "ssat%?\t%0, %1, %4%S3"; ++ else ++ return "usat%?\t%0, %1, %4%S3"; ++} ++ [(set_attr "predicable" "yes") ++ (set_attr "insn" "sat") ++ (set_attr "shift" "3") ++ (set_attr "type" "alu_shift")]) + + ;; Shift and rotation insns + +@@ -3619,12 +3797,10 @@ ;; to reduce register pressure later on. (define_expand "extzv" @@ -10221,7 +10428,7 @@ "TARGET_THUMB1 || arm_arch_thumb2" " { -@@ -3632,10 +3753,57 @@ +@@ -3633,10 +3809,57 @@ if (arm_arch_thumb2) { @@ -10282,7 +10489,7 @@ operands[3] = GEN_INT (rshift); -@@ -3645,12 +3813,154 @@ +@@ -3646,12 +3869,154 @@ DONE; } @@ -10440,7 +10647,7 @@ [(set (match_operand:SI 0 "s_register_operand" "=r") (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "const_int_operand" "M") -@@ -3672,6 +3982,28 @@ +@@ -3673,6 +4038,28 @@ (set_attr "predicable" "yes")] ) @@ -10469,7 +10676,7 @@ ;; Unary arithmetic insns -@@ -4044,8 +4376,8 @@ +@@ -4045,8 +4432,8 @@ (define_insn "zero_extenddi2" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -10480,7 +10687,7 @@ "TARGET_32BIT " "#" [(set_attr "length" "8") -@@ -5937,8 +6269,8 @@ +@@ -5963,8 +6350,8 @@ (define_insn "*arm_movqi_insn" @@ -10491,7 +10698,7 @@ "TARGET_32BIT && ( register_operand (operands[0], QImode) || register_operand (operands[1], QImode))" -@@ -5946,10 +6278,14 @@ +@@ -5972,10 +6359,14 @@ mov%?\\t%0, %1 mvn%?\\t%0, #%B1 ldr%(b%)\\t%0, %1 @@ -10509,7 +10716,7 @@ ) (define_insn "*thumb1_movqi_insn" -@@ -6179,7 +6515,7 @@ +@@ -6205,7 +6596,7 @@ [(match_operand:DF 0 "arm_reload_memory_operand" "=o") (match_operand:DF 1 "s_register_operand" "r") (match_operand:SI 2 "s_register_operand" "=&r")] @@ -10518,7 +10725,7 @@ " { enum rtx_code code = GET_CODE (XEXP (operands[0], 0)); -@@ -6442,7 +6778,7 @@ +@@ -6468,7 +6859,7 @@ (define_expand "cbranchsi4" [(set (pc) (if_then_else @@ -10527,7 +10734,7 @@ [(match_operand:SI 1 "s_register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")]) (label_ref (match_operand 3 "" "")) -@@ -6493,7 +6829,7 @@ +@@ -6519,7 +6910,7 @@ (define_expand "cbranchsf4" [(set (pc) (if_then_else @@ -10536,7 +10743,7 @@ [(match_operand:SF 1 "s_register_operand" "") (match_operand:SF 2 "arm_float_compare_operand" "")]) (label_ref (match_operand 3 "" "")) -@@ -6505,7 +6841,7 @@ +@@ -6531,7 +6922,7 @@ (define_expand "cbranchdf4" [(set (pc) (if_then_else @@ -10545,7 +10752,7 @@ [(match_operand:DF 1 "s_register_operand" "") (match_operand:DF 2 "arm_float_compare_operand" "")]) (label_ref (match_operand 3 "" "")) -@@ -6517,7 +6853,7 @@ +@@ -6543,7 +6934,7 @@ (define_expand "cbranchdi4" [(set (pc) (if_then_else @@ -10554,7 +10761,7 @@ [(match_operand:DI 1 "cmpdi_operand" "") (match_operand:DI 2 "cmpdi_operand" "")]) (label_ref (match_operand 3 "" "")) -@@ -7106,13 +7442,17 @@ +@@ -7132,13 +7523,17 @@ (define_insn "*arm_cmpsi_insn" [(set (reg:CC CC_REGNUM) @@ -10575,7 +10782,18 @@ ) (define_insn "*cmpsi_shiftsi" -@@ -7283,7 +7623,14 @@ +@@ -7201,8 +7596,8 @@ + [(set (reg:CC_CZ CC_REGNUM) + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r") + (match_operand:DI 1 "arm_di_operand" "rDi")))] +- "TARGET_ARM" +- "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1" ++ "TARGET_32BIT" ++ "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" + [(set_attr "conds" "set") + (set_attr "length" "8")] + ) +@@ -7309,7 +7704,14 @@ return \"b%d1\\t%l0\"; " [(set_attr "conds" "use") @@ -10591,7 +10809,7 @@ ) (define_insn "*arm_cond_branch_reversed" -@@ -7302,7 +7649,14 @@ +@@ -7328,7 +7730,14 @@ return \"b%D1\\t%l0\"; " [(set_attr "conds" "use") @@ -10607,7 +10825,7 @@ ) -@@ -7354,7 +7708,7 @@ +@@ -7380,7 +7789,7 @@ (define_expand "cstoresi4" [(set (match_operand:SI 0 "s_register_operand" "") @@ -10616,7 +10834,7 @@ [(match_operand:SI 2 "s_register_operand" "") (match_operand:SI 3 "reg_or_int_operand" "")]))] "TARGET_32BIT || TARGET_THUMB1" -@@ -7490,7 +7844,7 @@ +@@ -7516,7 +7925,7 @@ (define_expand "cstoresf4" [(set (match_operand:SI 0 "s_register_operand" "") @@ -10625,7 +10843,7 @@ [(match_operand:SF 2 "s_register_operand" "") (match_operand:SF 3 "arm_float_compare_operand" "")]))] "TARGET_32BIT && TARGET_HARD_FLOAT" -@@ -7500,7 +7854,7 @@ +@@ -7526,7 +7935,7 @@ (define_expand "cstoredf4" [(set (match_operand:SI 0 "s_register_operand" "") @@ -10634,7 +10852,7 @@ [(match_operand:DF 2 "s_register_operand" "") (match_operand:DF 3 "arm_float_compare_operand" "")]))] "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" -@@ -7510,7 +7864,7 @@ +@@ -7536,7 +7945,7 @@ (define_expand "cstoredi4" [(set (match_operand:SI 0 "s_register_operand" "") @@ -10643,7 +10861,7 @@ [(match_operand:DI 2 "cmpdi_operand" "") (match_operand:DI 3 "cmpdi_operand" "")]))] "TARGET_32BIT" -@@ -7630,7 +7984,7 @@ +@@ -7656,7 +8065,7 @@ (define_expand "movsicc" [(set (match_operand:SI 0 "s_register_operand" "") @@ -10652,7 +10870,7 @@ (match_operand:SI 2 "arm_not_operand" "") (match_operand:SI 3 "arm_not_operand" "")))] "TARGET_32BIT" -@@ -7650,7 +8004,7 @@ +@@ -7676,7 +8085,7 @@ (define_expand "movsfcc" [(set (match_operand:SF 0 "s_register_operand" "") @@ -10661,7 +10879,7 @@ (match_operand:SF 2 "s_register_operand" "") (match_operand:SF 3 "nonmemory_operand" "")))] "TARGET_32BIT && TARGET_HARD_FLOAT" -@@ -7676,7 +8030,7 @@ +@@ -7702,7 +8111,7 @@ (define_expand "movdfcc" [(set (match_operand:DF 0 "s_register_operand" "") @@ -10670,7 +10888,7 @@ (match_operand:DF 2 "s_register_operand" "") (match_operand:DF 3 "arm_float_add_operand" "")))] "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" -@@ -7754,7 +8108,14 @@ +@@ -7780,7 +8189,14 @@ return \"b%?\\t%l0\"; } " @@ -10686,7 +10904,7 @@ ) (define_insn "*thumb_jump" -@@ -8839,40 +9200,85 @@ +@@ -8865,40 +9281,85 @@ (set_attr "length" "8,12")] ) @@ -10717,15 +10935,7 @@ { - static const char * const opcodes[4][2] = + static const char * const cmp1[NUM_OF_COND_CMP][2] = - { -- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", -- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, -- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", -- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, -- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", -- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, -- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", -- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} ++ { + {\"cmp%d5\\t%0, %1\", + \"cmp%d4\\t%2, %3\"}, + {\"cmn%d5\\t%0, #%n1\", @@ -10734,7 +10944,7 @@ + \"cmn%d4\\t%2, #%n3\"}, + {\"cmn%d5\\t%0, #%n1\", + \"cmn%d4\\t%2, #%n3\"} - }; ++ }; + static const char * const cmp2[NUM_OF_COND_CMP][2] = + { + {\"cmp\\t%2, %3\", @@ -10747,10 +10957,18 @@ + \"cmn\\t%0, #%n1\"} + }; + static const char * const ite[2] = -+ { + { +- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, +- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, +- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, +- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} + \"it\\t%d5\", + \"it\\t%d4\" -+ }; + }; + static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, + CMP_CMP, CMN_CMP, CMP_CMP, + CMN_CMP, CMP_CMN, CMN_CMN}; @@ -10789,7 +11007,7 @@ ) (define_insn "*cmp_ite1" -@@ -8880,35 +9286,81 @@ +@@ -8906,35 +9367,81 @@ (compare (if_then_else:SI (match_operator 4 "arm_comparison_operator" @@ -10887,7 +11105,7 @@ ) (define_insn "*cmp_and" -@@ -8916,34 +9368,80 @@ +@@ -8942,34 +9449,80 @@ (compare (and:SI (match_operator 4 "arm_comparison_operator" @@ -10911,7 +11129,15 @@ { - static const char *const opcodes[4][2] = + static const char *const cmp1[NUM_OF_COND_CMP][2] = -+ { + { +- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, +- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, +- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", +- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, +- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", +- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} + {\"cmp%d5\\t%0, %1\", + \"cmp%d4\\t%2, %3\"}, + {\"cmn%d5\\t%0, #%n1\", @@ -10920,7 +11146,7 @@ + \"cmn%d4\\t%2, #%n3\"}, + {\"cmn%d5\\t%0, #%n1\", + \"cmn%d4\\t%2, #%n3\"} -+ }; + }; + static const char *const cmp2[NUM_OF_COND_CMP][2] = + { + {\"cmp\\t%2, %3\", @@ -10933,18 +11159,10 @@ + \"cmn\\t%0, #%n1\"} + }; + static const char *const ite[2] = - { -- {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", -- \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, -- {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", -- \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, -- {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", -- \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, -- {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", -- \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} ++ { + \"it\\t%d5\", + \"it\\t%d4\" - }; ++ }; + static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, + CMP_CMP, CMN_CMP, CMP_CMP, + CMN_CMP, CMP_CMN, CMN_CMN}; @@ -10984,7 +11202,7 @@ ) (define_insn "*cmp_ior" -@@ -8951,34 +9449,80 @@ +@@ -8977,34 +9530,80 @@ (compare (ior:SI (match_operator 4 "arm_comparison_operator" @@ -11087,7 +11305,7 @@ ) (define_insn_and_split "*ior_scc_scc" -@@ -8990,11 +9534,11 @@ +@@ -9016,11 +9615,11 @@ [(match_operand:SI 4 "s_register_operand" "r") (match_operand:SI 5 "arm_add_operand" "rIL")]))) (clobber (reg:CC CC_REGNUM))] @@ -11101,7 +11319,7 @@ [(set (match_dup 7) (compare (ior:SI -@@ -9023,9 +9567,9 @@ +@@ -9049,9 +9648,9 @@ (set (match_operand:SI 7 "s_register_operand" "=r") (ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] @@ -11113,7 +11331,7 @@ [(set (match_dup 0) (compare (ior:SI -@@ -9046,11 +9590,11 @@ +@@ -9072,11 +9671,11 @@ [(match_operand:SI 4 "s_register_operand" "r") (match_operand:SI 5 "arm_add_operand" "rIL")]))) (clobber (reg:CC CC_REGNUM))] @@ -11127,7 +11345,7 @@ && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) != CCmode)" [(set (match_dup 7) -@@ -9081,9 +9625,9 @@ +@@ -9107,9 +9706,9 @@ (set (match_operand:SI 7 "s_register_operand" "=r") (and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] @@ -11139,7 +11357,7 @@ [(set (match_dup 0) (compare (and:SI -@@ -9108,11 +9652,11 @@ +@@ -9134,11 +9733,11 @@ [(match_operand:SI 4 "s_register_operand" "r,r,r") (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")]))) (clobber (reg:CC CC_REGNUM))] @@ -11153,7 +11371,7 @@ [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) (clobber (reg:CC CC_REGNUM))]) -@@ -10222,6 +10766,8 @@ +@@ -10248,6 +10847,8 @@ ;; Push multiple registers to the stack. Registers are in parallel (use ...) ;; expressions. For simplicity, the first register is also in the unspec ;; part. @@ -11162,7 +11380,7 @@ (define_insn "*push_multi" [(match_parallel 2 "multi_register_push" [(set (match_operand:BLK 0 "memory_operand" "=m") -@@ -10261,7 +10807,9 @@ +@@ -10287,7 +10888,9 @@ return \"\"; }" @@ -11220,23 +11438,117 @@ +munaligned-access +Target Report Var(unaligned_access) Init(2) +Enable unaligned word and halfword accesses to packed data. ---- a/src/gcc/config/arm/bpabi.h -+++ b/src/gcc/config/arm/bpabi.h -@@ -56,7 +56,8 @@ - "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" - - #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ -- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" -+ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\ -+ ":%{!r:--be8}}}" +--- a/src/gcc/config/arm/arm-protos.h ++++ b/src/gcc/config/arm/arm-protos.h +@@ -46,6 +46,7 @@ + extern bool arm_small_register_classes_for_mode_p (enum machine_mode); + extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); + extern int const_ok_for_arm (HOST_WIDE_INT); ++extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); + extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, + HOST_WIDE_INT, rtx, rtx, int); + extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); +@@ -58,14 +59,19 @@ + int); + extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); ++extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); + extern int arm_const_double_rtx (rtx); + extern int neg_const_double_rtx_ok_for_fpa (rtx); + extern int vfp3_const_double_rtx (rtx); + extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); + extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, + int *); ++extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *, ++ int *, bool); + extern char *neon_output_logic_immediate (const char *, rtx *, + enum machine_mode, int, int); ++extern char *neon_output_shift_immediate (const char *, char, rtx *, ++ enum machine_mode, int, bool); + extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, + rtx (*) (rtx, rtx, rtx)); + extern rtx neon_make_constant (rtx); +@@ -81,7 +87,6 @@ + extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, + bool); + extern bool arm_tls_referenced_p (rtx); +-extern bool arm_cannot_force_const_mem (rtx); + + extern int cirrus_memory_offset (rtx); + extern int arm_coproc_mem_operand (rtx, bool); +@@ -99,6 +104,7 @@ + extern int symbol_mentioned_p (rtx); + extern int label_mentioned_p (rtx); + extern RTX_CODE minmax_code (rtx); ++extern bool arm_sat_operator_match (rtx, rtx, int *, bool *); + extern int adjacent_mem_locations (rtx, rtx); + extern bool gen_ldm_seq (rtx *, int, bool); + extern bool gen_stm_seq (rtx *, int); +@@ -152,6 +158,7 @@ + extern const char *arm_output_memory_barrier (rtx *); + extern const char *arm_output_sync_insn (rtx, rtx *); + extern unsigned int arm_sync_loop_insns (rtx , rtx *); ++extern int arm_attr_length_push_multi(rtx, rtx); + + #if defined TREE_CODE + extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); +@@ -175,6 +182,7 @@ + #endif + extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); + #ifdef RTX_CODE ++extern enum arm_cond_code maybe_get_arm_condition_code (rtx); + extern void thumb1_final_prescan_insn (rtx); + extern void thumb2_final_prescan_insn (rtx); + extern const char *thumb_load_double_from_address (rtx *); +@@ -220,12 +228,18 @@ + bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); + bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); + int constant_limit; ++ /* Maximum number of instructions to conditionalise in ++ arm_final_prescan_insn. */ ++ int max_insns_skipped; + int num_prefetch_slots; + int l1_cache_size; + int l1_cache_line_size; ++ bool prefer_constant_pool; ++ int (*branch_cost) (bool, bool); + }; + + extern const struct tune_params *current_tune; ++extern int vfp3_const_double_for_fract_bits (rtx); + #endif /* RTX_CODE */ + + #endif /* ! GCC_ARM_PROTOS_H */ +--- a/src/gcc/config/arm/arm-tune.md ++++ b/src/gcc/config/arm/arm-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from arm-cores.def + (define_attr "tune" +- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" ++ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) +--- a/src/gcc/config/arm/bpabi.h ++++ b/src/gcc/config/arm/bpabi.h +@@ -56,7 +56,9 @@ + "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" + + #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ +- "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" ++ "|mcpu=cortex-a7"\ ++ "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\ ++ ":%{!r:--be8}}}" + + /* Tell the assembler to build BPABI binaries. */ + #undef SUBTARGET_EXTRA_ASM_SPEC +--- a/src/gcc/config/arm/constraints.md ++++ b/src/gcc/config/arm/constraints.md +@@ -29,13 +29,14 @@ + ;; in Thumb-1 state: I, J, K, L, M, N, O - /* Tell the assembler to build BPABI binaries. */ - #undef SUBTARGET_EXTRA_ASM_SPEC ---- a/src/gcc/config/arm/constraints.md -+++ b/src/gcc/config/arm/constraints.md -@@ -31,11 +31,12 @@ ;; The following multi-letter normal constraints have been used: - ;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz +-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz ++;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz ;; in Thumb-1 state: Pa, Pb, Pc, Pd -;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px +;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py @@ -11279,7 +11591,20 @@ (define_constraint "G" "In ARM/Thumb-2 state a valid FPA immediate constant." (and (match_code "const_double") -@@ -327,6 +345,27 @@ +@@ -273,6 +291,12 @@ + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) + ++(define_constraint "Dt" ++ "@internal ++ In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation" ++ (and (match_code "const_double") ++ (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)"))) ++ + (define_memory_constraint "Ut" + "@internal + In ARM/Thumb-2 state an address valid for loading/storing opaque structure +@@ -327,6 +351,27 @@ (and (match_code "mem") (match_test "REG_P (XEXP (op, 0))"))) @@ -11983,17 +12308,45 @@ SYNC_LOCK_RELEASE (int, 4) SYNC_LOCK_RELEASE (short, 2) SYNC_LOCK_RELEASE (char, 1) ---- a/src/gcc/config/arm/neon-testgen.ml -+++ b/src/gcc/config/arm/neon-testgen.ml -@@ -177,7 +177,7 @@ - let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in - "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}" - | (PtrTo elt | CstPtrTo elt) -> -- "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]" -+ "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]" - | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" - | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" - | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" +--- a/src/gcc/config/arm/linux-eabi.h ++++ b/src/gcc/config/arm/linux-eabi.h +@@ -32,7 +32,8 @@ + while (false) + + /* We default to a soft-float ABI so that binaries can run on all +- target hardware. */ ++ target hardware. If you override this to use the hard-float ABI then ++ change the setting of GLIBC_DYNAMIC_LINKER_DEFAULT as well. */ + #undef TARGET_DEFAULT_FLOAT_ABI + #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT + +@@ -59,10 +60,23 @@ + #undef SUBTARGET_EXTRA_LINK_SPEC + #define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION + +-/* Use ld-linux.so.3 so that it will be possible to run "classic" +- GNU/Linux binaries on an EABI system. */ ++/* GNU/Linux on ARM currently supports three dynamic linkers: ++ - ld-linux.so.2 - for the legacy ABI ++ - ld-linux.so.3 - for the EABI-derived soft-float ABI ++ - ld-linux-armhf.so.3 - for the EABI-derived hard-float ABI. ++ All the dynamic linkers live in /lib. ++ We default to soft-float, but this can be overridden by changing both ++ GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI. */ ++ + #undef GLIBC_DYNAMIC_LINKER +-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.3" ++#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3" ++#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3" ++#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT ++ ++#define GLIBC_DYNAMIC_LINKER \ ++ "%{mfloat-abi=hard:" GLIBC_DYNAMIC_LINKER_HARD_FLOAT "} \ ++ %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \ ++ %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}" + + /* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ --- a/src/gcc/config/arm/neon.md +++ b/src/gcc/config/arm/neon.md @@ -783,30 +783,57 @@ @@ -13557,6 +13910,17 @@ + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) +--- a/src/gcc/config/arm/neon-testgen.ml ++++ b/src/gcc/config/arm/neon-testgen.ml +@@ -177,7 +177,7 @@ + let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in + "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}" + | (PtrTo elt | CstPtrTo elt) -> +- "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]" ++ "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]" + | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" --- a/src/gcc/config/arm/predicates.md +++ b/src/gcc/config/arm/predicates.md @@ -129,11 +129,18 @@ @@ -13578,7 +13942,7 @@ (define_predicate "arm_add_operand" (ior (match_operand 0 "arm_rhs_operand") (match_operand 0 "arm_neg_immediate_operand"))) -@@ -218,13 +225,20 @@ +@@ -218,13 +225,29 @@ (match_test "mode == GET_MODE (op)"))) ;; True for shift operators. @@ -13597,10 +13961,19 @@ + (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") + (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT + || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) ++ (match_test "mode == GET_MODE (op)"))) ++ ++;; True for shift operators which can be used with saturation instructions. ++(define_special_predicate "sat_shift_operator" ++ (and (ior (and (match_code "mult") ++ (match_test "power_of_two_operand (XEXP (op, 1), mode)")) ++ (and (match_code "ashift,ashiftrt") ++ (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT ++ && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1)) < 32)"))) (match_test "mode == GET_MODE (op)"))) ;; True for MULT, to identify which variant of shift_operator is in use. -@@ -241,11 +255,15 @@ +@@ -241,11 +264,15 @@ ;; True for integer comparisons and, if FP is active, for comparisons ;; other than LTGT or UNEQ. @@ -13620,7 +13993,7 @@ (define_special_predicate "lt_ge_comparison_operator" (match_code "lt,ge")) -@@ -289,8 +307,11 @@ +@@ -289,8 +316,11 @@ (define_special_predicate "arm_extendqisi_mem_op" (and (match_operand 0 "memory_operand") @@ -13634,7 +14007,7 @@ (define_special_predicate "arm_reg_or_extendqisi_mem_op" (ior (match_operand 0 "arm_extendqisi_mem_op") -@@ -585,6 +606,26 @@ +@@ -585,6 +615,26 @@ return neon_immediate_valid_for_move (op, mode, NULL, NULL); }) @@ -13661,10 +14034,15 @@ (define_predicate "imm_for_neon_logic_operand" (match_code "const_vector") { -@@ -684,5 +725,9 @@ +@@ -684,5 +734,14 @@ return true; }) ++(define_predicate "const_double_vcvt_power_of_two_reciprocal" ++ (and (match_code "const_double") ++ (match_test "TARGET_32BIT && TARGET_VFP ++ && vfp3_const_double_for_fract_bits (op)"))) ++ +(define_special_predicate "neon_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) @@ -14343,6 +14721,43 @@ { return arm_output_sync_insn (insn, operands); } +@@ -600,3 +494,36 @@ + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")]) + ++(define_expand "sync_lock_releasedi" ++ [(match_operand:DI 0 "memory_operand") ++ (match_operand:DI 1 "s_register_operand")] ++ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER" ++ { ++ struct arm_sync_generator generator; ++ rtx tmp1 = gen_reg_rtx (DImode); ++ generator.op = arm_sync_generator_omn; ++ generator.u.omn = gen_arm_sync_lock_releasedi; ++ arm_expand_sync (DImode, &generator, operands[1], operands[0], NULL, tmp1); ++ DONE; ++ } ++) ++ ++(define_insn "arm_sync_lock_releasedi" ++ [(set (match_operand:DI 2 "s_register_operand" "=&r") ++ (unspec_volatile:DI [(match_operand:DI 1 "arm_sync_memory_operand" "+Q") ++ (match_operand:DI 0 "s_register_operand" "r")] ++ VUNSPEC_SYNC_RELEASE)) ++ (clobber (reg:CC CC_REGNUM)) ++ (clobber (match_scratch:SI 3 "=&r"))] ++ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER" ++ { ++ return arm_output_sync_insn (insn, operands); ++ } ++ [(set_attr "sync_memory" "1") ++ (set_attr "sync_result" "2") ++ (set_attr "sync_t1" "2") ++ (set_attr "sync_t2" "3") ++ (set_attr "sync_new_value" "0") ++ (set_attr "conds" "clob") ++ (set_attr "predicable" "no")] ++) --- a/src/gcc/config/arm/t-arm +++ b/src/gcc/config/arm/t-arm @@ -31,6 +31,16 @@ @@ -14362,13 +14777,6 @@ $(srcdir)/config/arm/cirrus.md \ $(srcdir)/config/arm/fpa.md \ $(srcdir)/config/arm/vec-common.md \ ---- a/src/gcc/config/arm/t-linux-eabi -+++ b/src/gcc/config/arm/t-linux-eabi -@@ -36,3 +36,4 @@ - EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o - - LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c -+LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c --- a/src/gcc/config/arm/thumb2.md +++ b/src/gcc/config/arm/thumb2.md @@ -207,7 +207,9 @@ @@ -14382,7 +14790,7 @@ "@ mov%?\\t%0, %1\\t%@ movhi movw%?\\t%0, %L1\\t%@ movhi -@@ -779,26 +781,6 @@ +@@ -780,26 +782,6 @@ (set_attr "length" "2")] ) @@ -14409,7 +14817,7 @@ (define_insn "*thumb2_subsi_short" [(set (match_operand:SI 0 "low_register_operand" "=l") (minus:SI (match_operand:SI 1 "low_register_operand" "l") -@@ -836,7 +818,7 @@ +@@ -837,7 +819,7 @@ "operands[4] = GEN_INT (- INTVAL (operands[2]));" ) @@ -14418,7 +14826,7 @@ [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r") -@@ -1118,3 +1100,54 @@ +@@ -1119,3 +1101,54 @@ " operands[2] = GEN_INT (32 - INTVAL (operands[2])); ") @@ -14473,6 +14881,13 @@ + FAIL; +}") + +--- a/src/gcc/config/arm/t-linux-eabi ++++ b/src/gcc/config/arm/t-linux-eabi +@@ -41,3 +41,4 @@ + EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o + + LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c ++LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c --- a/src/gcc/config/arm/unwind-arm.c +++ b/src/gcc/config/arm/unwind-arm.c @@ -32,13 +32,18 @@ @@ -14643,515 +15058,68 @@ ) +@@ -1131,9 +1131,40 @@ + (set_attr "type" "fcmpd")] + ) + ++;; Fixed point to floating point conversions. ++(define_code_iterator FCVT [unsigned_float float]) ++(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) ++ ++(define_insn "*combine_vcvt_f32_" ++ [(set (match_operand:SF 0 "s_register_operand" "=t") ++ (mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0")) ++ (match_operand 2 ++ "const_double_vcvt_power_of_two_reciprocal" "Dt")))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math" ++ "vcvt.f32.\\t%0, %1, %v2" ++ [(set_attr "predicable" "no") ++ (set_attr "type" "f_cvt")] ++) ++ ++;; Not the ideal way of implementing this. Ideally we would be able to split ++;; this into a move to a DP register and then a vcvt.f64.i32 ++(define_insn "*combine_vcvt_f64_" ++ [(set (match_operand:DF 0 "s_register_operand" "=x,x,w") ++ (mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r")) ++ (match_operand 2 ++ "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))] ++ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math ++ && !TARGET_VFP_SINGLE" ++ "@ ++ vmov.f32\\t%0, %1\;vcvt.f64.\\t%P0, %P0, %v2 ++ vmov.f32\\t%0, %1\;vcvt.f64.\\t%P0, %P0, %v2 ++ vmov.f64\\t%P0, %1, %1\; vcvt.f64.\\t%P0, %P0, %v2" ++ [(set_attr "predicable" "no") ++ (set_attr "type" "f_cvt") ++ (set_attr "length" "8")] ++) + + ;; Store multiple insn used in function prologue. +- + (define_insn "*push_multi_vfp" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") --- a/src/gcc/config/arm/x-arm +++ b/src/gcc/config/arm/x-arm @@ -0,0 +1,3 @@ +driver-arm.o: $(srcdir)/config/arm/driver-arm.c \ + $(CONFIG_H) $(SYSTEM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< ---- a/src/gcc/config/darwin.c -+++ b/src/gcc/config/darwin.c -@@ -1753,19 +1753,51 @@ - return (!strncmp ((const char *)p, "_OBJC_", 6)); - } - --/* LTO support for Mach-O. */ -+/* LTO support for Mach-O. - --/* Section names for LTO sections. */ --static unsigned int lto_section_names_offset = 0; -+ This version uses three mach-o sections to encapsulate the (unlimited -+ number of) lto sections. - --/* This is the obstack which we use to allocate the many strings. */ --static struct obstack lto_section_names_obstack; -+ __GNU_LTO, __lto_sections contains the concatented GNU LTO section data. -+ __GNU_LTO, __section_names contains the GNU LTO section names. -+ __GNU_LTO, __section_index contains an array of values that index these. -+ -+ Indexed thus: -+
, -+
-+ . -+ -+ At present, for both m32 and m64 mach-o files each of these fields is -+ represented by a uint32_t. This is because, AFAICT, a mach-o object -+ cannot exceed 4Gb because the section_64 offset field (see below) is 32bits. -+ -+ uint32_t offset; -+ "offset An integer specifying the offset to this section in the file." */ -+ -+/* Count lto section numbers. */ -+static unsigned int lto_section_num = 0; -+ -+/* A vector of information about LTO sections, at present, we only have -+ the name. TODO: see if we can get the data length somehow. */ -+typedef struct GTY (()) darwin_lto_section_e { -+ const char *sectname; -+} darwin_lto_section_e ; -+DEF_VEC_O(darwin_lto_section_e); -+DEF_VEC_ALLOC_O(darwin_lto_section_e, gc); - --/* Segment name for LTO sections. */ -+static GTY (()) VEC (darwin_lto_section_e, gc) * lto_section_names; -+ -+/* Segment for LTO data. */ - #define LTO_SEGMENT_NAME "__GNU_LTO" - --/* Section name for LTO section names section. */ --#define LTO_NAMES_SECTION "__section_names" -+/* Section wrapper scheme (used here to wrap the unlimited number of LTO -+ sections into three Mach-O ones). -+ NOTE: These names MUST be kept in sync with those in -+ libiberty/simple-object-mach-o. */ -+#define LTO_SECTS_SECTION "__wrapper_sects" -+#define LTO_NAMES_SECTION "__wrapper_names" -+#define LTO_INDEX_SECTION "__wrapper_index" - - /* File to temporarily store LTO data. This is appended to asm_out_file - in darwin_end_file. */ -@@ -1808,37 +1840,38 @@ - unsigned int flags, - tree decl ATTRIBUTE_UNUSED) - { -- /* LTO sections go in a special segment __GNU_LTO. We want to replace the -- section name with something we can use to represent arbitrary-length -- names (section names in Mach-O are at most 16 characters long). */ -+ /* LTO sections go in a special section that encapsulates the (unlimited) -+ number of GNU LTO sections within a single mach-o one. */ - if (strncmp (name, LTO_SECTION_NAME_PREFIX, - strlen (LTO_SECTION_NAME_PREFIX)) == 0) - { -+ darwin_lto_section_e e; - /* We expect certain flags to be set... */ - gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED)) - == (SECTION_DEBUG | SECTION_NAMED)); - -- /* Add the section name to the things to output when we end the -- current assembler output file. -- This is all not very efficient, but that doesn't matter -- this -- shouldn't be a hot path in the compiler... */ -- obstack_1grow (<o_section_names_obstack, '\t'); -- obstack_grow (<o_section_names_obstack, ".ascii ", 7); -- obstack_1grow (<o_section_names_obstack, '"'); -- obstack_grow (<o_section_names_obstack, name, strlen (name)); -- obstack_grow (<o_section_names_obstack, "\\0\"\n", 4); -- -- /* Output the dummy section name. */ -- fprintf (asm_out_file, "\t# %s\n", name); -- fprintf (asm_out_file, "\t.section %s,__%08X,regular,debug\n", -- LTO_SEGMENT_NAME, lto_section_names_offset); -- -- /* Update the offset for the next section name. Make sure we stay -- within reasonable length. */ -- lto_section_names_offset += strlen (name) + 1; -- gcc_assert (lto_section_names_offset > 0 -- && lto_section_names_offset < ((unsigned) 1 << 31)); -- } -+ /* Switch to our combined section. */ -+ fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", -+ LTO_SEGMENT_NAME, LTO_SECTS_SECTION); -+ /* Output a label for the start of this sub-section. */ -+ fprintf (asm_out_file, "L_GNU_LTO%d:\t;# %s\n", -+ lto_section_num, name); -+ /* We have to jump through hoops to get the values of the intra-section -+ offsets... */ -+ fprintf (asm_out_file, "\t.set L$gnu$lto$offs%d,L_GNU_LTO%d-L_GNU_LTO0\n", -+ lto_section_num, lto_section_num); -+ fprintf (asm_out_file, -+ "\t.set L$gnu$lto$size%d,L_GNU_LTO%d-L_GNU_LTO%d\n", -+ lto_section_num, lto_section_num+1, lto_section_num); -+ lto_section_num++; -+ e.sectname = xstrdup (name); -+ /* Keep the names, we'll need to make a table later. -+ TODO: check that we do not revisit sections, that would break -+ the assumption of how this is done. */ -+ if (lto_section_names == NULL) -+ lto_section_names = VEC_alloc (darwin_lto_section_e, gc, 16); -+ VEC_safe_push (darwin_lto_section_e, gc, lto_section_names, &e); -+ } - else if (strncmp (name, "__DWARF,", 8) == 0) - darwin_asm_dwarf_section (name, flags, decl); - else -@@ -2711,16 +2744,12 @@ - darwin_asm_output_dwarf_delta (file, size, lab, sname); - } - --/* Called from the within the TARGET_ASM_FILE_START for each target. -- Initialize the stuff we need for LTO long section names support. */ -+/* Called from the within the TARGET_ASM_FILE_START for each target. */ - - void - darwin_file_start (void) - { -- /* We fill this obstack with the complete section text for the lto section -- names to write in darwin_file_end. */ -- obstack_init (<o_section_names_obstack); -- lto_section_names_offset = 0; -+ /* Nothing to do. */ - } - - /* Called for the TARGET_ASM_FILE_END hook. -@@ -2731,8 +2760,6 @@ - void - darwin_file_end (void) - { -- const char *lto_section_names; -- - machopic_finish (asm_out_file); - if (strcmp (lang_hooks.name, "GNU C++") == 0) - { -@@ -2762,6 +2789,13 @@ - lto_asm_txt = buf = (char *) xmalloc (n + 1); - while (fgets (lto_asm_txt, n, lto_asm_out_file)) - fputs (lto_asm_txt, asm_out_file); -+ /* Put a termination label. */ -+ fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", -+ LTO_SEGMENT_NAME, LTO_SECTS_SECTION); -+ fprintf (asm_out_file, "L_GNU_LTO%d:\t;# end of lto\n", -+ lto_section_num); -+ /* Make sure our termination label stays in this section. */ -+ fputs ("\t.space\t1\n", asm_out_file); - } - - /* Remove the temporary file. */ -@@ -2770,21 +2804,50 @@ - free (lto_asm_out_name); - } - -- /* Finish the LTO section names obstack. Don't output anything if -- there are no recorded section names. */ -- obstack_1grow (<o_section_names_obstack, '\0'); -- lto_section_names = XOBFINISH (<o_section_names_obstack, const char *); -- if (strlen (lto_section_names) > 0) -+ /* Output the names and indices. */ -+ if (lto_section_names && VEC_length (darwin_lto_section_e, lto_section_names)) - { -- fprintf (asm_out_file, -- "\t.section %s,%s,regular,debug\n", -+ int count; -+ darwin_lto_section_e *ref; -+ /* For now, we'll make the offsets 4 bytes and unaligned - we'll fix -+ the latter up ourselves. */ -+ const char *op = integer_asm_op (4,0); -+ -+ /* Emit the names. */ -+ fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", - LTO_SEGMENT_NAME, LTO_NAMES_SECTION); -- fprintf (asm_out_file, -- "\t# Section names in %s are offsets into this table\n", -- LTO_SEGMENT_NAME); -- fprintf (asm_out_file, "%s\n", lto_section_names); -+ FOR_EACH_VEC_ELT (darwin_lto_section_e, lto_section_names, count, ref) -+ { -+ fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\n", count); -+ /* We have to jump through hoops to get the values of the intra-section -+ offsets... */ -+ fprintf (asm_out_file, -+ "\t.set L$gnu$lto$noff%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME0\n", -+ count, count); -+ fprintf (asm_out_file, -+ "\t.set L$gnu$lto$nsiz%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME%d\n", -+ count, count+1, count); -+ fprintf (asm_out_file, "\t.asciz\t\"%s\"\n", ref->sectname); -+ } -+ fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\t;# end\n", lto_section_num); -+ /* make sure our termination label stays in this section. */ -+ fputs ("\t.space\t1\n", asm_out_file); -+ -+ /* Emit the Index. */ -+ fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", -+ LTO_SEGMENT_NAME, LTO_INDEX_SECTION); -+ fputs ("\t.align\t2\n", asm_out_file); -+ fputs ("# Section offset, Section length, Name offset, Name length\n", -+ asm_out_file); -+ FOR_EACH_VEC_ELT (darwin_lto_section_e, lto_section_names, count, ref) -+ { -+ fprintf (asm_out_file, "%s L$gnu$lto$offs%d\t;# %s\n", -+ op, count, ref->sectname); -+ fprintf (asm_out_file, "%s L$gnu$lto$size%d\n", op, count); -+ fprintf (asm_out_file, "%s L$gnu$lto$noff%d\n", op, count); -+ fprintf (asm_out_file, "%s L$gnu$lto$nsiz%d\n", op, count); -+ } - } -- obstack_free (<o_section_names_obstack, NULL); - - /* If we have section anchors, then we must prevent the linker from - re-arranging data. */ --- a/src/gcc/config/host-linux.c +++ b/src/gcc/config/host-linux.c -@@ -84,6 +84,8 @@ - # define TRY_EMPTY_VM_SPACE 0x60000000 +@@ -85,7 +85,7 @@ #elif defined(__mc68000__) # define TRY_EMPTY_VM_SPACE 0x40000000 -+#elif defined(__ARM_EABI__) + #elif defined(__ARM_EABI__) +-# define TRY_EMPTY_VM_SPACE 0x60000000 +# define TRY_EMPTY_VM_SPACE 0x60000000 #else # define TRY_EMPTY_VM_SPACE 0 #endif ---- a/src/gcc/config/i386/i386.c -+++ b/src/gcc/config/i386/i386.c -@@ -16329,7 +16329,6 @@ - basic_block bb = BLOCK_FOR_INSN (insn); - int distance = 0; - df_ref *def_rec; -- enum attr_type insn_type; - - if (insn != BB_HEAD (bb)) - { -@@ -16345,8 +16344,8 @@ - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { -- insn_type = get_attr_type (prev); -- if (insn_type != TYPE_LEA) -+ if (recog_memoized (prev) < 0 -+ || get_attr_type (prev) != TYPE_LEA) - goto done; - } - } -@@ -16385,8 +16384,8 @@ - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { -- insn_type = get_attr_type (prev); -- if (insn_type != TYPE_LEA) -+ if (recog_memoized (prev) < 0 -+ || get_attr_type (prev) != TYPE_LEA) - goto done; - } - } ---- a/src/gcc/config/i386/i386.md -+++ b/src/gcc/config/i386/i386.md -@@ -5103,7 +5103,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(set (match_dup 0) (float:MODEF (match_dup 1)))]) - - (define_split -@@ -5116,7 +5116,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (float:MODEF (match_dup 2)))]) - -@@ -5207,7 +5207,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(const_int 0)] - { - rtx op1 = operands[1]; -@@ -5248,7 +5248,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(const_int 0)] - { - operands[3] = simplify_gen_subreg (mode, operands[0], -@@ -5270,7 +5270,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(const_int 0)] - { - rtx op1 = operands[1]; -@@ -5314,7 +5314,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(const_int 0)] - { - operands[3] = simplify_gen_subreg (mode, operands[0], -@@ -5375,7 +5375,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(set (match_dup 0) (float:MODEF (match_dup 1)))]) - - (define_insn "*float2_sse_nointerunit" -@@ -5410,7 +5410,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (float:MODEF (match_dup 2)))]) - -@@ -5423,7 +5423,7 @@ - && reload_completed - && (SSE_REG_P (operands[0]) - || (GET_CODE (operands[0]) == SUBREG -- && SSE_REG_P (operands[0])))" -+ && SSE_REG_P (SUBREG_REG (operands[0]))))" - [(set (match_dup 0) (float:MODEF (match_dup 1)))]) - - (define_insn "*float2_i387_with_temp" -@@ -14625,7 +14625,7 @@ - emit_insn (gen_sse4_1_round2 - (operands[0], operands[1], GEN_INT (0x04))); - else -- ix86_expand_rint (operand0, operand1); -+ ix86_expand_rint (operands[0], operands[1]); - } - else - { -@@ -14649,9 +14649,9 @@ - if (optimize_insn_for_size_p ()) - FAIL; - if (TARGET_64BIT || (mode != DFmode)) -- ix86_expand_round (operand0, operand1); -+ ix86_expand_round (operands[0], operands[1]); - else -- ix86_expand_rounddf_32 (operand0, operand1); -+ ix86_expand_rounddf_32 (operands[0], operands[1]); - DONE; - }) - -@@ -14796,7 +14796,7 @@ - { - if (optimize_insn_for_size_p ()) - FAIL; -- ix86_expand_lround (operand0, operand1); -+ ix86_expand_lround (operands[0], operands[1]); - DONE; - }) - -@@ -14871,9 +14871,9 @@ - emit_insn (gen_sse4_1_round2 - (operands[0], operands[1], GEN_INT (0x01))); - else if (TARGET_64BIT || (mode != DFmode)) -- ix86_expand_floorceil (operand0, operand1, true); -+ ix86_expand_floorceil (operands[0], operands[1], true); - else -- ix86_expand_floorceildf_32 (operand0, operand1, true); -+ ix86_expand_floorceildf_32 (operands[0], operands[1], true); - } - else - { -@@ -15053,7 +15053,7 @@ - { - if (TARGET_64BIT && optimize_insn_for_size_p ()) - FAIL; -- ix86_expand_lfloorceil (operand0, operand1, true); -+ ix86_expand_lfloorceil (operands[0], operands[1], true); - DONE; - }) - -@@ -15128,9 +15128,9 @@ - else if (optimize_insn_for_size_p ()) - FAIL; - else if (TARGET_64BIT || (mode != DFmode)) -- ix86_expand_floorceil (operand0, operand1, false); -+ ix86_expand_floorceil (operands[0], operands[1], false); - else -- ix86_expand_floorceildf_32 (operand0, operand1, false); -+ ix86_expand_floorceildf_32 (operands[0], operands[1], false); - } - else - { -@@ -15308,7 +15308,7 @@ - "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH - && !flag_trapping_math" - { -- ix86_expand_lfloorceil (operand0, operand1, false); -+ ix86_expand_lfloorceil (operands[0], operands[1], false); - DONE; - }) - -@@ -15383,9 +15383,9 @@ - else if (optimize_insn_for_size_p ()) - FAIL; - else if (TARGET_64BIT || (mode != DFmode)) -- ix86_expand_trunc (operand0, operand1); -+ ix86_expand_trunc (operands[0], operands[1]); - else -- ix86_expand_truncdf_32 (operand0, operand1); -+ ix86_expand_truncdf_32 (operands[0], operands[1]); - } - else - { -@@ -18285,8 +18285,8 @@ - (match_operand:SI 3 "const_int_operand" "i")] - UNSPECV_LWPVAL_INTRINSIC)] - "TARGET_LWP" -- "/* Avoid unused variable warning. */ -- (void) operand0;") -+ ;; Avoid unused variable warning. -+ "(void) operands[0];") - - (define_insn "*lwp_lwpval3_1" - [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") ---- a/src/gcc/config/i386/sse.md -+++ b/src/gcc/config/i386/sse.md -@@ -4521,15 +4521,14 @@ - [(set (match_operand:V4DF 0 "register_operand" "=x,x") - (vec_select:V4DF - (vec_concat:V8DF -- (match_operand:V4DF 1 "nonimmediate_operand" "xm,x") -- (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm")) -+ (match_operand:V4DF 1 "nonimmediate_operand" " x,m") -+ (match_operand:V4DF 2 "nonimmediate_operand" "xm,1")) - (parallel [(const_int 0) (const_int 4) - (const_int 2) (const_int 6)])))] -- "TARGET_AVX -- && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))" -+ "TARGET_AVX" - "@ -- vmovddup\t{%1, %0|%0, %1} -- vunpcklpd\t{%2, %1, %0|%0, %1, %2}" -+ vunpcklpd\t{%2, %1, %0|%0, %1, %2} -+ vmovddup\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog") - (set_attr "prefix" "vex") - (set_attr "mode" "V4DF")]) ---- a/src/gcc/config/pa/pa.c -+++ b/src/gcc/config/pa/pa.c -@@ -1863,6 +1863,11 @@ - /* Handle the most common case: storing into a register. */ - else if (register_operand (operand0, mode)) - { -+ /* Legitimize TLS symbol references. This happens for references -+ that aren't a legitimate constant. */ -+ if (PA_SYMBOL_REF_TLS_P (operand1)) -+ operand1 = legitimize_tls_address (operand1); -+ - if (register_operand (operand1, mode) - || (GET_CODE (operand1) == CONST_INT - && cint_ok_for_move (INTVAL (operand1))) ---- a/src/gcc/config/pa/pa.h -+++ b/src/gcc/config/pa/pa.h -@@ -848,6 +848,9 @@ - && (NEW_HP_ASSEMBLER \ - || TARGET_GAS \ - || GET_CODE (X) != LABEL_REF) \ -+ && (!PA_SYMBOL_REF_TLS_P (X) \ -+ || (SYMBOL_REF_TLS_MODEL (X) != TLS_MODEL_GLOBAL_DYNAMIC \ -+ && SYMBOL_REF_TLS_MODEL (X) != TLS_MODEL_LOCAL_DYNAMIC)) \ - && (!TARGET_64BIT \ - || GET_CODE (X) != CONST_DOUBLE) \ - && (!TARGET_64BIT \ --- a/src/gcc/config/rs6000/rs6000.c +++ b/src/gcc/config/rs6000/rs6000.c -@@ -5134,7 +5134,9 @@ +@@ -5146,7 +5146,9 @@ for (i = 0; i < n_elts; ++i) { x = XVECEXP (vals, 0, i); @@ -15162,7 +15130,7 @@ ++n_var; } if (n_var == 0) -@@ -5286,7 +5288,9 @@ +@@ -5298,7 +5300,9 @@ for (i = 0; i < n_elts; ++i) { x = XVECEXP (vals, 0, i); @@ -15173,178 +15141,38 @@ ++n_var, one_var = i; else if (x != CONST0_RTX (inner_mode)) all_const_zero = false; -@@ -6824,6 +6828,7 @@ - #if TARGET_MACHO - && DEFAULT_ABI == ABI_DARWIN - && (flag_pic || MACHO_DYNAMIC_NO_PIC_P) -+ && machopic_symbol_defined_p (x) - #else - && DEFAULT_ABI == ABI_V4 - && !flag_pic -@@ -20227,7 +20232,7 @@ - { - /* This blockage is needed so that sched doesn't decide to move - the sp change before the register restores. */ -- if (frame_reg_rtx != sp_reg_rtx -+ if (DEFAULT_ABI == ABI_V4 - || (TARGET_SPE_ABI - && info->spe_64bit_regs_used != 0 - && info->first_gp_reg_save != 32)) ---- a/src/gcc/config/sparc/sparc.h -+++ b/src/gcc/config/sparc/sparc.h -@@ -408,6 +408,7 @@ - %{mcpu=sparclite:-Asparclite} \ - %{mcpu=sparclite86x:-Asparclite} \ - %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \ -+%{mcpu=v8:-Av8} \ - %{mv8plus:-Av8plus} \ - %{mcpu=v9:-Av9} \ - %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \ ---- a/src/gcc/config/sparc/sparc.md -+++ b/src/gcc/config/sparc/sparc.md -@@ -1813,8 +1813,8 @@ - }) - - (define_insn "*movsf_insn" -- [(set (match_operand:V32 0 "nonimmediate_operand" "=d,f,*r,*r,*r,f,*r,m,m") -- (match_operand:V32 1 "input_operand" "GY,f,*rRY,Q,S,m,m,f,*rGY"))] -+ [(set (match_operand:V32 0 "nonimmediate_operand" "=d,f, *r,*r,*r,f,*r,m, m") -+ (match_operand:V32 1 "input_operand" "GY,f,*rRY, Q, S,m, m,f,*rGY"))] - "TARGET_FPU - && (register_operand (operands[0], mode) - || register_or_zero_operand (operands[1], mode))" -@@ -1861,8 +1861,8 @@ - ;; when -mno-fpu. - - (define_insn "*movsf_insn_no_fpu" -- [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,r,m") -- (match_operand:SF 1 "input_operand" "rR,Q,S,m,rG"))] -+ [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,r, m") -+ (match_operand:SF 1 "input_operand" "rR,Q,S,m,rG"))] - "! TARGET_FPU - && (register_operand (operands[0], SFmode) - || register_or_zero_operand (operands[1], SFmode))" -@@ -1948,8 +1948,8 @@ - - ;; Be careful, fmovd does not exist when !v9. - (define_insn "*movdf_insn_sp32" -- [(set (match_operand:DF 0 "nonimmediate_operand" "=e,W,U,T,o,e,*r,o,e,o") -- (match_operand:DF 1 "input_operand" "W#F,e,T,U,G,e,*rFo,*r,o#F,e"))] -+ [(set (match_operand:DF 0 "nonimmediate_operand" "= e,W,U,T,o,e, *r, o, e,o") -+ (match_operand:DF 1 "input_operand" "W#F,e,T,U,G,e,*rFo,*r,o#F,e"))] - "TARGET_FPU - && ! TARGET_V9 - && (register_operand (operands[0], DFmode) -@@ -1969,8 +1969,8 @@ - (set_attr "length" "*,*,*,*,2,2,2,2,2,2")]) - - (define_insn "*movdf_insn_sp32_no_fpu" -- [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,o,r,o") -- (match_operand:DF 1 "input_operand" "T,U,G,ro,r"))] -+ [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,o, r,o") -+ (match_operand:DF 1 "input_operand" " T,U,G,ro,r"))] - "! TARGET_FPU - && ! TARGET_V9 - && (register_operand (operands[0], DFmode) -@@ -1986,8 +1986,8 @@ - - ;; We have available v9 double floats but not 64-bit integer registers. - (define_insn "*movdf_insn_sp32_v9" -- [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e,e,T,W,U,T,f,*r,o") -- (match_operand:V64 1 "input_operand" "GY,e,W#F,GY,e,T,U,o#F,*roGYDF,*rGYf"))] -+ [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e, e, T,W,U,T, f, *r, o") -+ (match_operand:V64 1 "input_operand" "GY,e,W#F,GY,e,T,U,o#F,*roFD,*rGYf"))] - "TARGET_FPU - && TARGET_V9 - && ! TARGET_ARCH64 -@@ -2009,8 +2009,8 @@ - (set_attr "fptype" "double,double,*,*,*,*,*,*,*,*")]) - - (define_insn "*movdf_insn_sp32_v9_no_fpu" -- [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,T,r,o") -- (match_operand:DF 1 "input_operand" "T,U,G,ro,rG"))] -+ [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,T, r, o") -+ (match_operand:DF 1 "input_operand" " T,U,G,ro,rG"))] - "! TARGET_FPU - && TARGET_V9 - && ! TARGET_ARCH64 -@@ -2027,8 +2027,8 @@ - - ;; We have available both v9 double floats and 64-bit integer registers. - (define_insn "*movdf_insn_sp64" -- [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e,e,W,*r,*r,m,*r") -- (match_operand:V64 1 "input_operand" "GY,e,W#F,e,*rGY,m,*rGY,DF"))] -+ [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e, e,W, *r,*r, m,*r") -+ (match_operand:V64 1 "input_operand" "GY,e,W#F,e,*rGY, m,*rGY,FD"))] - "TARGET_FPU - && TARGET_ARCH64 - && (register_operand (operands[0], mode) -@@ -2047,8 +2047,8 @@ - (set_attr "fptype" "double,double,*,*,*,*,*,*")]) - - (define_insn "*movdf_insn_sp64_no_fpu" -- [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m") -- (match_operand:DF 1 "input_operand" "r,m,rG"))] -+ [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r, m") -+ (match_operand:DF 1 "input_operand" "r,m,rG"))] - "! TARGET_FPU - && TARGET_ARCH64 - && (register_operand (operands[0], DFmode) -@@ -2288,8 +2288,8 @@ - }) - - (define_insn "*movtf_insn_sp32" -- [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,U,r") -- (match_operand:TF 1 "input_operand" "G,oe,GeUr,o,roG"))] -+ [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e, o,U, r") -+ (match_operand:TF 1 "input_operand" " G,oe,GeUr,o,roG"))] - "TARGET_FPU - && ! TARGET_ARCH64 - && (register_operand (operands[0], TFmode) -@@ -2302,8 +2302,8 @@ - ;; when -mno-fpu. - - (define_insn "*movtf_insn_sp32_no_fpu" -- [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o,r,o") -- (match_operand:TF 1 "input_operand" "G,o,U,roG,r"))] -+ [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o, r,o") -+ (match_operand:TF 1 "input_operand" " G,o,U,roG,r"))] - "! TARGET_FPU - && ! TARGET_ARCH64 - && (register_operand (operands[0], TFmode) -@@ -2312,8 +2312,8 @@ - [(set_attr "length" "4")]) - - (define_insn "*movtf_insn_sp64" -- [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,r") -- (match_operand:TF 1 "input_operand" "G,oe,Ger,roG"))] -+ [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e, o, r") -+ (match_operand:TF 1 "input_operand" "G,oe,Ger,roG"))] - "TARGET_FPU - && TARGET_ARCH64 - && ! TARGET_HARD_QUAD -@@ -2323,8 +2323,8 @@ - [(set_attr "length" "2")]) - - (define_insn "*movtf_insn_sp64_hq" -- [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m,o,r") -- (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))] -+ [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o, r") -+ (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))] - "TARGET_FPU - && TARGET_ARCH64 - && TARGET_HARD_QUAD -@@ -2341,8 +2341,8 @@ - (set_attr "length" "2,*,*,*,2,2")]) - - (define_insn "*movtf_insn_sp64_no_fpu" -- [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o") -- (match_operand:TF 1 "input_operand" "orG,rG"))] -+ [(set (match_operand:TF 0 "nonimmediate_operand" "= r, o") -+ (match_operand:TF 1 "input_operand" "orG,rG"))] - "! TARGET_FPU - && TARGET_ARCH64 - && (register_operand (operands[0], TFmode) +--- a/src/gcc/config.gcc ++++ b/src/gcc/config.gcc +@@ -177,7 +177,12 @@ + # configure_default_options + # Set to an initializer for configure_default_options + # in configargs.h, based on --with-cpu et cetera. +- ++# ++# target_type_format_char ++# The default character to be used for formatting ++# the attribute in a ++# .type symbol_name, ${t_t_f_c} ++# directive. + # The following variables are used in each case-construct to build up the + # outgoing variables: + # +@@ -219,6 +224,7 @@ + target_gtfiles= + need_64bit_hwint= + need_64bit_isa= ++target_type_format_char='@' + + # Don't carry these over build->host->target. Please. + xm_file= +@@ -312,6 +318,7 @@ + arm*-*-*) + cpu_type=arm + extra_headers="mmintrin.h arm_neon.h" ++ target_type_format_char='%' + c_target_objs="arm-c.o" + cxx_target_objs="arm-c.o" + ;; --- a/src/gcc/config.host +++ b/src/gcc/config.host @@ -100,6 +100,14 @@ @@ -15364,7 +15192,7 @@ alpha*-*-linux*) --- a/src/gcc/configure +++ b/src/gcc/configure -@@ -1647,7 +1647,8 @@ +@@ -1652,7 +1652,8 @@ use sysroot as the system root during the build --with-sysroot=DIR Search for usr/lib, usr/include, et al, within DIR. --with-specs=SPECS add SPECS to driver command-line processing @@ -15374,7 +15202,7 @@ --with-bugurl=URL Direct users to URL to report a bug --with-multilib-list Select multilibs (SH only) --with-gnu-ld assume the C compiler uses GNU ld default=no -@@ -7129,7 +7130,7 @@ +@@ -7161,7 +7162,7 @@ *) PKGVERSION="($withval) " ;; esac else @@ -15383,100 +15211,46 @@ fi -@@ -7442,17 +7443,7 @@ - RANLIB="$ac_cv_prog_RANLIB" - fi - --case "${host}" in --*-*-darwin*) -- # By default, the Darwin ranlib will not treat common symbols as -- # definitions when building the archive table of contents. Other -- # ranlibs do that; pass an option to the Darwin ranlib that makes -- # it behave similarly. -- ranlib_flags="-c" -- ;; --*) -- ranlib_flags="" --esac -+ranlib_flags="" - - - # Find a good install program. We prefer a C program (faster), -@@ -15740,7 +15731,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -16653,7 +16644,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -16671,7 +16662,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -17505,7 +17496,7 @@ +@@ -17527,7 +17528,7 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF --#line 17508 "configure" -+#line 17499 "configure" +-#line 17530 "configure" ++#line 17531 "configure" #include "confdefs.h" #if HAVE_DLFCN_H -@@ -17611,7 +17602,7 @@ +@@ -17633,7 +17634,7 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF --#line 17614 "configure" -+#line 17605 "configure" +-#line 17636 "configure" ++#line 17637 "configure" #include "confdefs.h" #if HAVE_DLFCN_H -@@ -18537,7 +18528,7 @@ - esac - ;; - -- freebsd[12]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - ld_shlibs_CXX=no -@@ -20312,7 +20303,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -20330,7 +20321,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) +@@ -25259,7 +25260,7 @@ + then gcc_cv_as_gnu_unique_object=yes + fi + elif test x$gcc_cv_as != x; then +- echo '.type foo, @gnu_unique_object' > conftest.s ++ echo '.type foo, '$target_type_format_char'gnu_unique_object' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 +@@ -25278,7 +25279,8 @@ + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_gnu_unique_object" >&5 + $as_echo "$gcc_cv_as_gnu_unique_object" >&6; } + if test $gcc_cv_as_gnu_unique_object = yes; then +- # Also check for ld.so support, i.e. glibc 2.11 or higher. ++ # We need to unquote above to to use the definition from config.gcc. ++# Also check for ld.so support, i.e. glibc 2.11 or higher. + if test x$host = x$build -a x$host = x$target && + ldd --version 2>/dev/null && + glibcver=`ldd --version 2>/dev/null | sed 's/.* //;q'`; then --- a/src/gcc/configure.ac +++ b/src/gcc/configure.ac -@@ -760,7 +760,7 @@ +@@ -782,7 +782,7 @@ ) AC_SUBST(CONFIGURE_SPECS) @@ -15485,139 +15259,16 @@ ACX_BUGURL([http://gcc.gnu.org/bugs.html]) # Sanity check enable_languages in case someone does not run the toplevel -@@ -807,17 +807,7 @@ - gcc_AC_PROG_LN_S - ACX_PROG_LN($LN_S) - AC_PROG_RANLIB --case "${host}" in --*-*-darwin*) -- # By default, the Darwin ranlib will not treat common symbols as -- # definitions when building the archive table of contents. Other -- # ranlibs do that; pass an option to the Darwin ranlib that makes -- # it behave similarly. -- ranlib_flags="-c" -- ;; --*) -- ranlib_flags="" --esac -+ranlib_flags="" - AC_SUBST(ranlib_flags) - - gcc_AC_PROG_INSTALL ---- a/src/gcc/cp/ChangeLog -+++ b/src/gcc/cp/ChangeLog -@@ -1,3 +1,32 @@ -+2011-11-22 Paolo Carlini -+ -+ PR c++/51265 -+ * semantics.c (finish_decltype_type): Handle PTRMEM_CST. -+ -+2011-11-18 Paolo Carlini -+ -+ PR c++/51150 -+ * pt.c (tsubst_copy_and_build): Handle FIX_TRUNC_EXPR. -+ -+2011-11-07 Jason Merrill -+ -+ PR c++/50870 -+ * pt.c (tsubst_copy): Handle NAMESPACE_DECL. -+ (tsubst_copy_and_build) [COMPONENT_REF]: Handle a still-dependent -+ object. -+ -+2011-11-04 Eric Botcazou -+ -+ PR c++/50608 -+ * semantics.c (finish_offsetof): Adjust call to fold_offsetof. -+ * typeck.c (cp_build_addr_expr_1): Call fold_offsetof_1. -+ -+2011-10-29 Paolo Carlini -+ -+ PR c++/50901 -+ * call.c (build_new_op_1): Handle ABS_EXPR together with the -+ other unary EXPR. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/gcc/cp/call.c -+++ b/src/gcc/cp/call.c -@@ -4996,6 +4996,7 @@ - case POSTDECREMENT_EXPR: - case REALPART_EXPR: - case IMAGPART_EXPR: -+ case ABS_EXPR: - return cp_build_unary_op (code, arg1, candidates != 0, complain); - - case ARRAY_REF: ---- a/src/gcc/cp/pt.c -+++ b/src/gcc/cp/pt.c -@@ -11439,6 +11439,9 @@ - mark_used (t); - return t; - -+ case NAMESPACE_DECL: -+ return t; -+ - case OVERLOAD: - /* An OVERLOAD will always be a non-dependent overload set; an - overload set from function scope will just be represented with an -@@ -12704,6 +12707,10 @@ - return build_x_unary_op (TREE_CODE (t), RECUR (TREE_OPERAND (t, 0)), - complain); - -+ case FIX_TRUNC_EXPR: -+ return cp_build_unary_op (FIX_TRUNC_EXPR, RECUR (TREE_OPERAND (t, 0)), -+ 0, complain); -+ - case ADDR_EXPR: - op1 = TREE_OPERAND (t, 0); - if (TREE_CODE (op1) == LABEL_DECL) -@@ -13179,7 +13186,9 @@ - if (member == error_mark_node) - return error_mark_node; - -- if (object_type && !CLASS_TYPE_P (object_type)) -+ if (type_dependent_expression_p (object)) -+ /* We can't do much here. */; -+ else if (!CLASS_TYPE_P (object_type)) - { - if (SCALAR_TYPE_P (object_type)) - { ---- a/src/gcc/cp/semantics.c -+++ b/src/gcc/cp/semantics.c -@@ -3348,7 +3348,7 @@ - } - if (TREE_CODE (expr) == INDIRECT_REF && REFERENCE_REF_P (expr)) - expr = TREE_OPERAND (expr, 0); -- return fold_offsetof (expr, NULL_TREE); -+ return fold_offsetof (expr); - } - - /* Replace the AGGR_INIT_EXPR at *TP with an equivalent CALL_EXPR. This -@@ -4927,8 +4927,9 @@ - gcc_unreachable (); - - case INTEGER_CST: -+ case PTRMEM_CST: - /* We can get here when the id-expression refers to an -- enumerator. */ -+ enumerator or non-type template parameter. */ - type = TREE_TYPE (expr); - break; - ---- a/src/gcc/cp/typeck.c -+++ b/src/gcc/cp/typeck.c -@@ -4835,9 +4835,7 @@ - && TREE_CONSTANT (TREE_OPERAND (val, 0))) - { - tree type = build_pointer_type (argtype); -- tree op0 = fold_convert (type, TREE_OPERAND (val, 0)); -- tree op1 = fold_convert (sizetype, fold_offsetof (arg, val)); -- return fold_build2 (POINTER_PLUS_EXPR, type, op0, op1); -+ return fold_convert (type, fold_offsetof_1 (arg)); - } - - /* Handle complex lvalues (when permitted) +@@ -3945,7 +3945,8 @@ + esac], + [gcc_GAS_CHECK_FEATURE([gnu_unique_object], gcc_cv_as_gnu_unique_object, + [elf,2,19,52],, +- [.type foo, @gnu_unique_object],, ++ [.type foo, '$target_type_format_char'gnu_unique_object],, ++# We need to unquote above to to use the definition from config.gcc. + # Also check for ld.so support, i.e. glibc 2.11 or higher. + [[if test x$host = x$build -a x$host = x$target && + ldd --version 2>/dev/null && --- a/src/gcc/cp/typeck2.c +++ b/src/gcc/cp/typeck2.c @@ -479,18 +479,20 @@ @@ -15723,7 +15374,7 @@ TREE_READONLY (dest) = 0; --- a/src/gcc/ddg.c +++ b/src/gcc/ddg.c -@@ -145,6 +145,27 @@ +@@ -145,6 +145,45 @@ return rtx_mem_access_p (PATTERN (insn)); } @@ -15748,10 +15399,28 @@ + return false; +} + ++/* Return true if one of the definitions in INSN has MODE_CC. Otherwise ++ return false. */ ++static bool ++def_has_ccmode_p (rtx insn) ++{ ++ df_ref *def; ++ ++ for (def = DF_INSN_DEFS (insn); *def; def++) ++ { ++ enum machine_mode mode = GET_MODE (DF_REF_REG (*def)); ++ ++ if (GET_MODE_CLASS (mode) == MODE_CC) ++ return true; ++ } ++ ++ return false; ++} ++ /* Computes the dependence parameters (latency, distance etc.), creates a ddg_edge and adds it to the given DDG. */ static void -@@ -173,10 +194,15 @@ +@@ -173,10 +212,16 @@ compensate for that by generating reg-moves based on the life-range analysis. The anti-deps that will be deleted are the ones which have true-deps edges in the opposite direction (in other words @@ -15766,11 +15435,12 @@ - if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP)) + if (flag_modulo_sched_allow_regmoves + && (t == ANTI_DEP && dt == REG_DEP) ++ && !def_has_ccmode_p (dest_node->insn) + && !autoinc_var_is_used_p (dest_node->insn, src_node->insn)) { rtx set; -@@ -301,8 +327,15 @@ +@@ -301,8 +346,16 @@ gcc_assert (first_def_node); @@ -15783,11 +15453,12 @@ - || !flag_modulo_sched_allow_regmoves) + || !flag_modulo_sched_allow_regmoves + || JUMP_P (use_node->insn) -+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)) ++ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn) ++ || def_has_ccmode_p (DF_REF_INSN (last_def))) create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP, REG_DEP, 1); -@@ -385,6 +418,33 @@ +@@ -385,6 +438,33 @@ &PATTERN (insn2)); } @@ -15821,7 +15492,7 @@ /* Given two nodes, analyze their RTL insns and add inter-loop mem deps to ddg G. */ static void -@@ -472,10 +532,22 @@ +@@ -472,10 +552,22 @@ if (DEBUG_INSN_P (j_node->insn)) continue; if (mem_access_insn_p (j_node->insn)) @@ -15846,7 +15517,7 @@ } } } -@@ -1011,6 +1083,7 @@ +@@ -1011,6 +1103,7 @@ for (i = 0; i < all_sccs->num_sccs; i++) free_scc (all_sccs->sccs[i]); @@ -15874,58 +15545,6 @@ && !df_ignore_stack_reg (mws->start_regno)) { bool really_add_notes = debug_insn != 0; ---- a/src/gcc/doc/tm.texi.in -+++ b/src/gcc/doc/tm.texi.in -@@ -2521,7 +2521,7 @@ - register, so @code{TARGET_PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when - @var{x} is a floating-point constant. If the constant can't be loaded - into any kind of register, code generation will be better if --@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead -+@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead - of using @code{TARGET_PREFERRED_RELOAD_CLASS}. - - If an insn has pseudos in it after register allocation, reload will go -@@ -2558,8 +2558,8 @@ - register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when - @var{x} is a floating-point constant. If the constant can't be loaded - into any kind of register, code generation will be better if --@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead --of using @code{PREFERRED_RELOAD_CLASS}. -+@code{TARGET_LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead -+of using @code{TARGET_PREFERRED_RELOAD_CLASS}. - - If an insn has pseudos in it after register allocation, reload will go - through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS} -@@ -4305,6 +4305,8 @@ - must have move patterns for this mode. - @end deftypefn - -+@hook TARGET_ARRAY_MODE_SUPPORTED_P -+ - @hook TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P - Define this to return nonzero for machine modes for which the port has - small register classes. If this target hook returns nonzero for a given -@@ -5555,13 +5557,13 @@ - @code{TARGET_MODE_DEPENDENT_ADDRESS_P} target hook. - @end defmac - --@defmac LEGITIMATE_CONSTANT_P (@var{x}) --A C expression that is nonzero if @var{x} is a legitimate constant for --an immediate operand on the target machine. You can assume that --@var{x} satisfies @code{CONSTANT_P}, so you need not check this. In fact, --@samp{1} is a suitable definition for this macro on machines where --anything @code{CONSTANT_P} is valid. --@end defmac -+@hook TARGET_LEGITIMATE_CONSTANT_P -+This hook returns true if @var{x} is a legitimate constant for a -+@var{mode}-mode immediate operand on the target machine. You can assume that -+@var{x} satisfies @code{CONSTANT_P}, so you need not check this. -+ -+The default definition returns true. -+@end deftypefn - - @hook TARGET_DELEGITIMIZE_ADDRESS - This hook is used to undo the possibly obfuscating effects of the --- a/src/gcc/dojump.c +++ b/src/gcc/dojump.c @@ -36,6 +36,7 @@ @@ -15936,273 +15555,6 @@ static bool prefer_and_bit_test (enum machine_mode, int); static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int); ---- a/src/gcc/dwarf2out.c -+++ b/src/gcc/dwarf2out.c -@@ -4431,6 +4431,11 @@ - const char *section; /* Section this loclist is relative to */ - dw_loc_descr_ref expr; - hashval_t hash; -+ /* True if all addresses in this and subsequent lists are known to be -+ resolved. */ -+ bool resolved_addr; -+ /* True if this list has been replaced by dw_loc_next. */ -+ bool replaced; - bool emitted; - } dw_loc_list_node; - -@@ -6091,6 +6096,19 @@ - /* Table of decl location linked lists. */ - static GTY ((param_is (var_loc_list))) htab_t decl_loc_table; - -+/* A cached location list. */ -+struct GTY (()) cached_dw_loc_list_def { -+ /* The DECL_UID of the decl that this entry describes. */ -+ unsigned int decl_id; -+ -+ /* The cached location list. */ -+ dw_loc_list_ref loc_list; -+}; -+typedef struct cached_dw_loc_list_def cached_dw_loc_list; -+ -+/* Table of cached location lists. */ -+static GTY ((param_is (cached_dw_loc_list))) htab_t cached_dw_loc_list_table; -+ - /* A pointer to the base of a list of references to DIE's that - are uniquely identified by their tag, presence/absence of - children DIE's, and list of attribute/value pairs. */ -@@ -6439,7 +6457,7 @@ - static void insert_double (double_int, unsigned char *); - static void insert_float (const_rtx, unsigned char *); - static rtx rtl_for_decl_location (tree); --static bool add_location_or_const_value_attribute (dw_die_ref, tree, -+static bool add_location_or_const_value_attribute (dw_die_ref, tree, bool, - enum dwarf_attribute); - static bool tree_add_const_value_attribute (dw_die_ref, tree); - static bool tree_add_const_value_attribute_for_decl (dw_die_ref, tree); -@@ -8173,6 +8191,24 @@ - htab_find_with_hash (decl_loc_table, decl, DECL_UID (decl)); - } - -+/* Returns a hash value for X (which really is a cached_dw_loc_list_list). */ -+ -+static hashval_t -+cached_dw_loc_list_table_hash (const void *x) -+{ -+ return (hashval_t) ((const cached_dw_loc_list *) x)->decl_id; -+} -+ -+/* Return nonzero if decl_id of cached_dw_loc_list X is the same as -+ UID of decl *Y. */ -+ -+static int -+cached_dw_loc_list_table_eq (const void *x, const void *y) -+{ -+ return (((const cached_dw_loc_list *) x)->decl_id -+ == DECL_UID ((const_tree) y)); -+} -+ - /* Equate a DIE to a particular declaration. */ - - static void -@@ -16995,15 +17031,22 @@ - these things can crop up in other ways also.) Note that one type of - constant value which can be passed into an inlined function is a constant - pointer. This can happen for example if an actual argument in an inlined -- function call evaluates to a compile-time constant address. */ -+ function call evaluates to a compile-time constant address. -+ -+ CACHE_P is true if it is worth caching the location list for DECL, -+ so that future calls can reuse it rather than regenerate it from scratch. -+ This is true for BLOCK_NONLOCALIZED_VARS in inlined subroutines, -+ since we will need to refer to them each time the function is inlined. */ - - static bool --add_location_or_const_value_attribute (dw_die_ref die, tree decl, -+add_location_or_const_value_attribute (dw_die_ref die, tree decl, bool cache_p, - enum dwarf_attribute attr) - { - rtx rtl; - dw_loc_list_ref list; - var_loc_list *loc_list; -+ cached_dw_loc_list *cache; -+ void **slot; - - if (TREE_CODE (decl) == ERROR_MARK) - return false; -@@ -17040,7 +17083,33 @@ - && add_const_value_attribute (die, rtl)) - return true; - } -- list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2); -+ /* If this decl is from BLOCK_NONLOCALIZED_VARS, we might need its -+ list several times. See if we've already cached the contents. */ -+ list = NULL; -+ if (loc_list == NULL || cached_dw_loc_list_table == NULL) -+ cache_p = false; -+ if (cache_p) -+ { -+ cache = (cached_dw_loc_list *) -+ htab_find_with_hash (cached_dw_loc_list_table, decl, DECL_UID (decl)); -+ if (cache) -+ list = cache->loc_list; -+ } -+ if (list == NULL) -+ { -+ list = loc_list_from_tree (decl, decl_by_reference_p (decl) ? 0 : 2); -+ /* It is usually worth caching this result if the decl is from -+ BLOCK_NONLOCALIZED_VARS and if the list has at least two elements. */ -+ if (cache_p && list && list->dw_loc_next) -+ { -+ slot = htab_find_slot_with_hash (cached_dw_loc_list_table, decl, -+ DECL_UID (decl), INSERT); -+ cache = ggc_alloc_cleared_cached_dw_loc_list (); -+ cache->decl_id = DECL_UID (decl); -+ cache->loc_list = list; -+ *slot = cache; -+ } -+ } - if (list) - { - add_AT_location_description (die, attr, list); -@@ -18738,7 +18807,7 @@ - equate_decl_number_to_die (node, parm_die); - if (! DECL_ABSTRACT (node_or_origin)) - add_location_or_const_value_attribute (parm_die, node_or_origin, -- DW_AT_location); -+ node == NULL, DW_AT_location); - - break; - -@@ -18923,6 +18992,7 @@ - tree context; - int was_abstract; - htab_t old_decl_loc_table; -+ htab_t old_cached_dw_loc_list_table; - - /* Make sure we have the actual abstract inline, not a clone. */ - decl = DECL_ORIGIN (decl); -@@ -18937,6 +19007,8 @@ - get locations in abstract instantces. */ - old_decl_loc_table = decl_loc_table; - decl_loc_table = NULL; -+ old_cached_dw_loc_list_table = cached_dw_loc_list_table; -+ cached_dw_loc_list_table = NULL; - - /* Be sure we've emitted the in-class declaration DIE (if any) first, so - we don't get confused by DECL_ABSTRACT. */ -@@ -18961,6 +19033,7 @@ - - current_function_decl = save_fn; - decl_loc_table = old_decl_loc_table; -+ cached_dw_loc_list_table = old_cached_dw_loc_list_table; - pop_cfun (); - } - -@@ -19745,9 +19818,8 @@ - && !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl_or_origin))) - defer_location (decl_or_origin, var_die); - else -- add_location_or_const_value_attribute (var_die, -- decl_or_origin, -- DW_AT_location); -+ add_location_or_const_value_attribute (var_die, decl_or_origin, -+ decl == NULL, DW_AT_location); - add_pubname (decl_or_origin, var_die); - } - else -@@ -21534,6 +21606,7 @@ - dwarf2out_decl (decl); - - htab_empty (decl_loc_table); -+ htab_empty (cached_dw_loc_list_table); - } - - /* Output a marker (i.e. a label) for the beginning of the generated code for -@@ -22267,6 +22340,11 @@ - decl_loc_table = htab_create_ggc (10, decl_loc_table_hash, - decl_loc_table_eq, NULL); - -+ /* Allocate the cached_dw_loc_list_table. */ -+ cached_dw_loc_list_table -+ = htab_create_ggc (10, cached_dw_loc_list_table_hash, -+ cached_dw_loc_list_table_eq, NULL); -+ - /* Allocate the initial hunk of the decl_scope_table. */ - decl_scope_table = VEC_alloc (tree, gc, 256); - -@@ -22907,30 +22985,53 @@ - { - dw_die_ref c; - dw_attr_ref a; -- dw_loc_list_ref *curr; -+ dw_loc_list_ref *curr, *start, loc; - unsigned ix; - - FOR_EACH_VEC_ELT (dw_attr_node, die->die_attr, ix, a) - switch (AT_class (a)) - { - case dw_val_class_loc_list: -- curr = AT_loc_list_ptr (a); -- while (*curr) -+ start = curr = AT_loc_list_ptr (a); -+ loc = *curr; -+ gcc_assert (loc); -+ /* The same list can be referenced more than once. See if we have -+ already recorded the result from a previous pass. */ -+ if (loc->replaced) -+ *curr = loc->dw_loc_next; -+ else if (!loc->resolved_addr) - { -- if (!resolve_addr_in_expr ((*curr)->expr)) -+ /* As things stand, we do not expect or allow one die to -+ reference a suffix of another die's location list chain. -+ References must be identical or completely separate. -+ There is therefore no need to cache the result of this -+ pass on any list other than the first; doing so -+ would lead to unnecessary writes. */ -+ while (*curr) - { -- dw_loc_list_ref next = (*curr)->dw_loc_next; -- if (next && (*curr)->ll_symbol) -+ gcc_assert (!(*curr)->replaced && !(*curr)->resolved_addr); -+ if (!resolve_addr_in_expr ((*curr)->expr)) - { -- gcc_assert (!next->ll_symbol); -- next->ll_symbol = (*curr)->ll_symbol; -+ dw_loc_list_ref next = (*curr)->dw_loc_next; -+ if (next && (*curr)->ll_symbol) -+ { -+ gcc_assert (!next->ll_symbol); -+ next->ll_symbol = (*curr)->ll_symbol; -+ } -+ *curr = next; - } -- *curr = next; -+ else -+ curr = &(*curr)->dw_loc_next; - } -+ if (loc == *start) -+ loc->resolved_addr = 1; - else -- curr = &(*curr)->dw_loc_next; -+ { -+ loc->replaced = 1; -+ loc->dw_loc_next = *start; -+ } - } -- if (!AT_loc_list (a)) -+ if (!*start) - { - remove_AT (die, a->dw_attr); - ix--; -@@ -23359,6 +23460,7 @@ - add_location_or_const_value_attribute ( - VEC_index (deferred_locations, deferred_locations_list, i)->die, - VEC_index (deferred_locations, deferred_locations_list, i)->variable, -+ false, - DW_AT_location); - } - --- a/src/gcc/expmed.c +++ b/src/gcc/expmed.c @@ -657,6 +657,10 @@ @@ -16294,7 +15646,7 @@ x = validize_mem (force_const_mem (mode, x)); /* See if the machine can do this with a load multiple insn. */ -@@ -2308,7 +2308,7 @@ +@@ -2413,7 +2413,7 @@ offset -= size; cst = (*constfun) (constfundata, offset, mode); @@ -16303,7 +15655,7 @@ return 0; if (!reverse) -@@ -3363,7 +3363,7 @@ +@@ -3468,7 +3468,7 @@ y_cst = y; @@ -16312,7 +15664,7 @@ { y = force_const_mem (mode, y); -@@ -3419,7 +3419,7 @@ +@@ -3524,7 +3524,7 @@ REAL_VALUE_FROM_CONST_DOUBLE (r, y); @@ -16321,7 +15673,7 @@ oldcost = rtx_cost (y, SET, speed); else oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed); -@@ -3442,7 +3442,7 @@ +@@ -3547,7 +3547,7 @@ trunc_y = CONST_DOUBLE_FROM_REAL_VALUE (r, srcmode); @@ -16330,7 +15682,7 @@ { /* Skip if the target needs extra instructions to perform the extension. */ -@@ -3855,7 +3855,7 @@ +@@ -3960,7 +3960,7 @@ by setting SKIP to 0. */ skip = (reg_parm_stack_space == 0) ? 0 : not_stack; @@ -16339,7 +15691,7 @@ x = validize_mem (force_const_mem (mode, x)); /* If X is a hard register in a non-integer mode, copy it into a pseudo; -@@ -4866,16 +4866,136 @@ +@@ -4976,16 +4976,136 @@ return NULL_RTX; } @@ -16480,7 +15832,7 @@ /* Whether CTOR is a valid constant initializer, in accordance with what initializer_constant_valid_p does. If inferred from the constructor -@@ -4884,7 +5004,9 @@ +@@ -4994,7 +5114,9 @@ bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor); nz_elts = 0; @@ -16491,7 +15843,7 @@ FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value) { -@@ -4899,6 +5021,8 @@ +@@ -5009,6 +5131,8 @@ mult = (tree_low_cst (hi_index, 1) - tree_low_cst (lo_index, 1) + 1); } @@ -16500,7 +15852,7 @@ switch (TREE_CODE (value)) { -@@ -4906,11 +5030,11 @@ +@@ -5016,11 +5140,11 @@ { HOST_WIDE_INT nz = 0, ic = 0; @@ -16515,7 +15867,7 @@ if (const_from_elts_p && const_p) const_p = const_elt_p; -@@ -4922,12 +5046,12 @@ +@@ -5032,12 +5156,12 @@ case FIXED_CST: if (!initializer_zerop (value)) nz_elts += mult; @@ -16530,7 +15882,7 @@ break; case COMPLEX_CST: -@@ -4935,7 +5059,7 @@ +@@ -5045,7 +5169,7 @@ nz_elts += mult; if (!initializer_zerop (TREE_IMAGPART (value))) nz_elts += mult; @@ -16539,7 +15891,7 @@ break; case VECTOR_CST: -@@ -4945,65 +5069,31 @@ +@@ -5055,65 +5179,31 @@ { if (!initializer_zerop (TREE_VALUE (v))) nz_elts += mult; @@ -16613,7 +15965,7 @@ return const_p; } -@@ -5013,111 +5103,50 @@ +@@ -5123,111 +5213,50 @@ and place it in *P_NZ_ELTS; * how many scalar fields in total are in CTOR, and place it in *P_ELT_COUNT. @@ -16752,7 +16104,7 @@ } /* Return 1 if EXP contains mostly (3/4) zeros. */ -@@ -5126,18 +5155,12 @@ +@@ -5236,18 +5265,12 @@ mostly_zeros_p (const_tree exp) { if (TREE_CODE (exp) == CONSTRUCTOR) @@ -16775,7 +16127,7 @@ } return initializer_zerop (exp); -@@ -5149,12 +5172,11 @@ +@@ -5259,12 +5282,11 @@ all_zeros_p (const_tree exp) { if (TREE_CODE (exp) == CONSTRUCTOR) @@ -16791,7 +16143,7 @@ return nz_elts == 0; } -@@ -7666,18 +7688,16 @@ +@@ -7805,18 +7827,16 @@ { enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0)); this_optab = usmul_widen_optab; @@ -16819,7 +16171,7 @@ } } /* Check for a multiplication with matching signedness. */ -@@ -7692,10 +7712,10 @@ +@@ -7831,10 +7851,10 @@ optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab; this_optab = zextend_p ? umul_widen_optab : smul_widen_optab; @@ -16833,7 +16185,7 @@ { expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); -@@ -7703,7 +7723,8 @@ +@@ -7842,7 +7862,8 @@ unsignedp, this_optab); return REDUCE_BIT_FIELD (temp); } @@ -16843,7 +16195,7 @@ && innermode == word_mode) { rtx htem, hipart; -@@ -8269,6 +8290,19 @@ +@@ -8456,6 +8477,19 @@ return target; } @@ -16863,7 +16215,7 @@ case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: -@@ -8545,10 +8579,13 @@ +@@ -8737,10 +8771,13 @@ if (code == SSA_NAME && (g = SSA_NAME_DEF_STMT (ssa_name)) && gimple_code (g) == GIMPLE_CALL) @@ -16881,7 +16233,7 @@ else pmode = promote_decl_mode (exp, &unsignedp); gcc_assert (GET_MODE (decl_rtl) == pmode); -@@ -9108,7 +9145,7 @@ +@@ -9301,7 +9338,7 @@ constant and we don't need a memory reference. */ if (CONSTANT_P (op0) && mode2 != BLKmode @@ -16890,9 +16242,26 @@ && !must_force_mem) op0 = force_reg (mode2, op0); +--- a/src/gcc/flag-types.h ++++ b/src/gcc/flag-types.h +@@ -106,6 +106,14 @@ + }; + #endif + ++/* The algorithm used to implement -fsched-pressure. */ ++enum sched_pressure_algorithm ++{ ++ SCHED_PRESSURE_NONE, ++ SCHED_PRESSURE_WEIGHTED, ++ SCHED_PRESSURE_MODEL ++}; ++ + /* The algorithm used for the integrated register allocator (IRA). */ + enum ira_algorithm + { --- a/src/gcc/fold-const.c +++ b/src/gcc/fold-const.c -@@ -9232,15 +9232,10 @@ +@@ -9239,15 +9239,10 @@ 0 <= N < M as is common. In general, the precise value of P is unknown. M is chosen as large as possible such that constant N can be determined. @@ -16910,7 +16279,7 @@ { enum tree_code code; -@@ -9270,9 +9265,8 @@ +@@ -9277,9 +9272,8 @@ } } @@ -16922,7 +16291,7 @@ } else if (code == POINTER_PLUS_EXPR) { -@@ -9282,8 +9276,7 @@ +@@ -9289,8 +9283,7 @@ op0 = TREE_OPERAND (expr, 0); STRIP_NOPS (op0); @@ -16932,7 +16301,7 @@ op1 = TREE_OPERAND (expr, 1); STRIP_NOPS (op1); -@@ -11163,8 +11156,7 @@ +@@ -11154,8 +11147,7 @@ unsigned HOST_WIDE_INT modulus, residue; unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1); @@ -16942,88 +16311,6 @@ /* This works because modulus is a power of 2. If this weren't the case, we'd have to replace it by its greatest power-of-2 ---- a/src/gcc/fortran/ChangeLog -+++ b/src/gcc/fortran/ChangeLog -@@ -1,3 +1,18 @@ -+2011-11-25 Tobias Burnus -+ -+ PR fortran/50408 -+ * trans-decl.c (gfc_get_module_backend_decl): Also copy -+ ts.u.derived from the gsym if the ts.type is BT_CLASS. -+ (gfc_get_extern_function_decl): Copy also the backend_decl -+ for the symbol's ts.u.{derived,cl} from the gsym. -+ * trans-types.c (gfc_copy_dt_decls_ifequal): Directly -+ return if "from" and "to" are the same. -+ -+2011-11-24 Tobias Burnus -+ -+ PR fortran/51218 -+ * gfortran.dg/implicit_pure_1.f90: New. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/gcc/fortran/resolve.c -+++ b/src/gcc/fortran/resolve.c -@@ -3132,10 +3132,10 @@ - "procedure within a PURE procedure", name, &expr->where); - t = FAILURE; - } -- } - -- if (!pure_function (expr, &name) && name && gfc_implicit_pure (NULL)) -- gfc_current_ns->proc_name->attr.implicit_pure = 0; -+ if (gfc_implicit_pure (NULL)) -+ gfc_current_ns->proc_name->attr.implicit_pure = 0; -+ } - - /* Functions without the RECURSIVE attribution are not allowed to - * call themselves. */ -@@ -3195,6 +3195,9 @@ - else if (gfc_pure (NULL)) - gfc_error ("Subroutine call to '%s' at %L is not PURE", sym->name, - &c->loc); -+ -+ if (gfc_implicit_pure (NULL)) -+ gfc_current_ns->proc_name->attr.implicit_pure = 0; - } - - ---- a/src/gcc/fortran/trans-decl.c -+++ b/src/gcc/fortran/trans-decl.c -@@ -677,7 +677,7 @@ - } - else if (s->backend_decl) - { -- if (sym->ts.type == BT_DERIVED) -+ if (sym->ts.type == BT_DERIVED || sym->ts.type == BT_CLASS) - gfc_copy_dt_decls_ifequal (s->ts.u.derived, sym->ts.u.derived, - true); - else if (sym->ts.type == BT_CHARACTER) -@@ -1602,6 +1602,11 @@ - gfc_find_symbol (sym->name, gsym->ns, 0, &s); - if (s && s->backend_decl) - { -+ if (sym->ts.type == BT_DERIVED || sym->ts.type == BT_CLASS) -+ gfc_copy_dt_decls_ifequal (s->ts.u.derived, sym->ts.u.derived, -+ true); -+ else if (sym->ts.type == BT_CHARACTER) -+ sym->ts.u.cl->backend_decl = s->ts.u.cl->backend_decl; - sym->backend_decl = s->backend_decl; - return sym->backend_decl; - } ---- a/src/gcc/fortran/trans-types.c -+++ b/src/gcc/fortran/trans-types.c -@@ -2092,6 +2092,9 @@ - gfc_component *to_cm; - gfc_component *from_cm; - -+ if (from == to) -+ return 1; -+ - if (from->backend_decl == NULL - || !gfc_compare_derived_types (from, to)) - return 0; --- a/src/gcc/gengtype-lex.c +++ b/src/gcc/gengtype-lex.c @@ -55,7 +55,6 @@ @@ -17063,8 +16350,8 @@ #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; --#line 1 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 1 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 1 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 1 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* -*- indented-text -*- */ /* Process source files and output type information. Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010 @@ -17072,8 +16359,8 @@ along with GCC; see the file COPYING3. If not see . */ #define YY_NO_INPUT 1 --#line 25 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 25 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 25 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 25 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" #include "bconfig.h" #include "system.h" @@ -17112,8 +16399,8 @@ register char *yy_cp, *yy_bp; register int yy_act; --#line 59 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 59 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 59 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 59 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* Do this on entry to yylex(): */ *yylval = 0; @@ -17130,8 +16417,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 70 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 70 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 70 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 70 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return TYPEDEF; @@ -17139,8 +16426,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 74 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 74 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 74 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 74 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return STRUCT; @@ -17148,8 +16435,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 78 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 78 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 78 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 78 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return UNION; @@ -17157,8 +16444,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 82 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 82 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 82 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 82 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return EXTERN; @@ -17166,8 +16453,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 86 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 86 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 86 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 86 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return STATIC; @@ -17175,8 +16462,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 91 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 91 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 91 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 91 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return DEFVEC_OP; @@ -17184,8 +16471,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 95 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 95 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 95 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 95 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return DEFVEC_I; @@ -17193,8 +16480,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 99 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 99 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 99 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 99 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); return DEFVEC_ALLOC; @@ -17202,22 +16489,22 @@ case 9: YY_RULE_SETUP --#line 107 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 107 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 107 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 107 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct_comment); } YY_BREAK case 10: /* rule 10 can match eol */ YY_RULE_SETUP --#line 109 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 109 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 109 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 109 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { update_lineno (yytext, yyleng); } YY_BREAK case 11: /* rule 11 can match eol */ YY_RULE_SETUP --#line 110 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 110 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 110 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 110 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { lexer_line.line++; } YY_BREAK case 12: @@ -17225,8 +16512,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 5; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 112 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 112 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 112 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 112 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* don't care */ YY_BREAK case 13: @@ -17234,8 +16521,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 3; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 113 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 113 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 113 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 113 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return GTY_TOKEN; } YY_BREAK case 14: @@ -17243,8 +16530,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 3; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 114 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 114 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 114 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 114 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return VEC_TOKEN; } YY_BREAK case 15: @@ -17252,8 +16539,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 5; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 115 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 115 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 115 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 115 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return UNION; } YY_BREAK case 16: @@ -17261,8 +16548,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 6; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 116 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 116 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 116 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 116 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return STRUCT; } YY_BREAK case 17: @@ -17270,8 +16557,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 4; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 117 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 117 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 117 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 117 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return ENUM; } YY_BREAK case 18: @@ -17279,8 +16566,8 @@ (yy_c_buf_p) = yy_cp = yy_bp + 9; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 118 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 118 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 118 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 118 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return PTR_ALIAS; } YY_BREAK case 19: @@ -17288,14 +16575,14 @@ (yy_c_buf_p) = yy_cp = yy_bp + 10; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 119 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 119 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 119 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 119 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return NESTED_PTR; } YY_BREAK case 20: YY_RULE_SETUP --#line 120 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 120 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 120 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 120 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return NUM; } YY_BREAK case 21: @@ -17303,8 +16590,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 121 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 121 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 121 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 121 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1); return PARAM_IS; @@ -17312,13 +16599,13 @@ *yy_cp = (yy_hold_char); /* undo effects of setting up yytext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ --#line 127 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 127 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 127 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 127 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" case 23: /* rule 23 can match eol */ YY_RULE_SETUP --#line 127 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 127 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 127 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 127 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { size_t len; @@ -17326,8 +16613,8 @@ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 139 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 139 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 139 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 139 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1); return ID; @@ -17335,8 +16622,8 @@ case 25: /* rule 25 can match eol */ YY_RULE_SETUP --#line 144 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 144 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 144 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 144 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng-1); return STRING; @@ -17344,8 +16631,8 @@ case 26: /* rule 26 can match eol */ YY_RULE_SETUP --#line 149 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 149 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 149 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 149 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng-1); return ARRAY; @@ -17353,8 +16640,8 @@ case 27: /* rule 27 can match eol */ YY_RULE_SETUP --#line 153 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 153 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 153 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 153 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng); return CHAR; @@ -17362,28 +16649,28 @@ YY_BREAK case 28: YY_RULE_SETUP --#line 158 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 158 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 158 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 158 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return ELLIPSIS; } YY_BREAK case 29: YY_RULE_SETUP --#line 159 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 159 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 159 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 159 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { return yytext[0]; } YY_BREAK /* ignore pp-directives */ case 30: /* rule 30 can match eol */ YY_RULE_SETUP --#line 162 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 162 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 162 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 162 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" {lexer_line.line++;} YY_BREAK case 31: YY_RULE_SETUP --#line 164 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 164 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 164 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 164 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { error_at_line (&lexer_line, "unexpected character `%s'", yytext); } @@ -17391,36 +16678,36 @@ case 32: YY_RULE_SETUP --#line 169 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 169 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 169 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 169 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_comment); } YY_BREAK case 33: /* rule 33 can match eol */ YY_RULE_SETUP --#line 170 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 170 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 170 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 170 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { lexer_line.line++; } YY_BREAK case 34: --#line 172 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 172 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 172 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 172 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" case 35: /* rule 35 can match eol */ --#line 173 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 173 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 173 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 173 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" case 36: /* rule 36 can match eol */ YY_RULE_SETUP --#line 173 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 173 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 173 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 173 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* do nothing */ YY_BREAK case 37: /* rule 37 can match eol */ YY_RULE_SETUP --#line 174 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 174 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 174 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 174 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { update_lineno (yytext, yyleng); } YY_BREAK case 38: @@ -17428,25 +16715,25 @@ (yy_c_buf_p) = yy_cp = yy_bp + 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 175 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 175 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 175 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 175 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* do nothing */ YY_BREAK case 39: /* rule 39 can match eol */ YY_RULE_SETUP --#line 178 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 178 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 178 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 178 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { lexer_line.line++; } YY_BREAK case 40: --#line 180 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 180 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 180 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 180 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" case 41: YY_RULE_SETUP --#line 180 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 180 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 180 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 180 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* do nothing */ YY_BREAK case 42: @@ -17454,30 +16741,30 @@ (yy_c_buf_p) = yy_cp = yy_bp + 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP --#line 181 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 181 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 181 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 181 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* do nothing */ YY_BREAK case 43: YY_RULE_SETUP --#line 183 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 183 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 183 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 183 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(INITIAL); } YY_BREAK case 44: YY_RULE_SETUP --#line 184 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 184 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 184 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 184 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { BEGIN(in_struct); } YY_BREAK case 45: --#line 187 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 187 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 187 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 187 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" case 46: YY_RULE_SETUP --#line 187 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 187 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 187 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 187 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" { error_at_line (&lexer_line, "unterminated comment or string; unexpected EOF"); @@ -17485,14 +16772,14 @@ case 47: /* rule 47 can match eol */ YY_RULE_SETUP --#line 192 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 192 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 192 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 192 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" /* do nothing */ YY_BREAK case 48: YY_RULE_SETUP --#line 194 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 194 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 194 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 194 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" YY_FATAL_ERROR( "flex scanner jammed" ); YY_BREAK -#line 1650 "gengtype-lex.c" @@ -17515,8 +16802,8 @@ #define YYTABLES_NAME "yytables" --#line 194 "/d/gcc-4.6.2/gcc-4.6.2/gcc/gengtype-lex.l" -+#line 194 "/home/ams/tmp/linaro/gcc-4.6/gcc-linaro-4.6-2011.12/gcc/gengtype-lex.l" +-#line 194 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l" ++#line 194 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l" @@ -17628,57 +16915,6 @@ break; } ---- a/src/gcc/gimple-low.c -+++ b/src/gcc/gimple-low.c -@@ -218,6 +218,10 @@ - tree fndecl, parms, p; - unsigned int i, nargs; - -+ /* Calls to internal functions always match their signature. */ -+ if (gimple_call_internal_p (stmt)) -+ return true; -+ - nargs = gimple_call_num_args (stmt); - - /* Get argument types for verification. */ ---- a/src/gcc/gimple-pretty-print.c -+++ b/src/gcc/gimple-pretty-print.c -@@ -343,6 +343,8 @@ - case VEC_EXTRACT_ODD_EXPR: - case VEC_INTERLEAVE_HIGH_EXPR: - case VEC_INTERLEAVE_LOW_EXPR: -+ case VEC_WIDEN_LSHIFT_HI_EXPR: -+ case VEC_WIDEN_LSHIFT_LO_EXPR: - for (p = tree_code_name [(int) code]; *p; p++) - pp_character (buffer, TOUPPER (*p)); - pp_string (buffer, " <"); -@@ -596,8 +598,12 @@ - - if (flags & TDF_RAW) - { -- dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T", -- gs, gimple_call_fn (gs), lhs); -+ if (gimple_call_internal_p (gs)) -+ dump_gimple_fmt (buffer, spc, flags, "%G <%s, %T", gs, -+ internal_fn_name (gimple_call_internal_fn (gs)), lhs); -+ else -+ dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T", -+ gs, gimple_call_fn (gs), lhs); - if (gimple_call_num_args (gs) > 0) - { - pp_string (buffer, ", "); -@@ -617,7 +623,10 @@ - - pp_space (buffer); - } -- print_call_name (buffer, gimple_call_fn (gs), flags); -+ if (gimple_call_internal_p (gs)) -+ pp_string (buffer, internal_fn_name (gimple_call_internal_fn (gs))); -+ else -+ print_call_name (buffer, gimple_call_fn (gs), flags); - pp_string (buffer, " ("); - dump_gimple_call_args (buffer, gs, flags); - pp_character (buffer, ')'); --- a/src/gcc/gimple.c +++ b/src/gcc/gimple.c @@ -276,6 +276,59 @@ @@ -17830,25 +17066,7 @@ return 0; switch (TREE_STRING_POINTER (attr)[0]) -@@ -2293,6 +2374,7 @@ - if (is_gimple_call (s)) - { - unsigned nargs = gimple_call_num_args (s); -+ tree fn; - - if (!(gimple_call_flags (s) & (ECF_CONST | ECF_PURE))) - return true; -@@ -2307,7 +2389,8 @@ - return true; - } - -- if (TREE_SIDE_EFFECTS (gimple_call_fn (s))) -+ fn = gimple_call_fn (s); -+ if (fn && TREE_SIDE_EFFECTS (fn)) - return true; - - for (i = 0; i < nargs; i++) -@@ -2349,14 +2432,15 @@ +@@ -2317,14 +2398,15 @@ if (is_gimple_call (s)) { unsigned nargs = gimple_call_num_args (s); @@ -17866,7 +17084,7 @@ { gcc_assert (gimple_has_volatile_ops (s)); return true; -@@ -3113,7 +3197,6 @@ +@@ -3081,7 +3163,6 @@ gimple_call_copy_skip_args (gimple stmt, bitmap args_to_skip) { int i; @@ -17874,7 +17092,7 @@ int nargs = gimple_call_num_args (stmt); VEC(tree, heap) *vargs = VEC_alloc (tree, heap, nargs); gimple new_stmt; -@@ -3122,7 +3205,11 @@ +@@ -3090,7 +3171,11 @@ if (!bitmap_bit_p (args_to_skip, i)) VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i)); @@ -18017,9 +17235,60 @@ /* See through the pointer. */ type = TREE_TYPE (type); +--- a/src/gcc/gimple-low.c ++++ b/src/gcc/gimple-low.c +@@ -218,6 +218,10 @@ + tree fndecl, parms, p; + unsigned int i, nargs; + ++ /* Calls to internal functions always match their signature. */ ++ if (gimple_call_internal_p (stmt)) ++ return true; ++ + nargs = gimple_call_num_args (stmt); + + /* Get argument types for verification. */ +--- a/src/gcc/gimple-pretty-print.c ++++ b/src/gcc/gimple-pretty-print.c +@@ -343,6 +343,8 @@ + case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + for (p = tree_code_name [(int) code]; *p; p++) + pp_character (buffer, TOUPPER (*p)); + pp_string (buffer, " <"); +@@ -596,8 +598,12 @@ + + if (flags & TDF_RAW) + { +- dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T", +- gs, gimple_call_fn (gs), lhs); ++ if (gimple_call_internal_p (gs)) ++ dump_gimple_fmt (buffer, spc, flags, "%G <%s, %T", gs, ++ internal_fn_name (gimple_call_internal_fn (gs)), lhs); ++ else ++ dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T", ++ gs, gimple_call_fn (gs), lhs); + if (gimple_call_num_args (gs) > 0) + { + pp_string (buffer, ", "); +@@ -617,7 +623,10 @@ + + pp_space (buffer); + } +- print_call_name (buffer, gimple_call_fn (gs), flags); ++ if (gimple_call_internal_p (gs)) ++ pp_string (buffer, internal_fn_name (gimple_call_internal_fn (gs))); ++ else ++ print_call_name (buffer, gimple_call_fn (gs), flags); + pp_string (buffer, " ("); + dump_gimple_call_args (buffer, gs, flags); + pp_character (buffer, ')'); --- a/src/gcc/gimplify.c +++ b/src/gcc/gimplify.c -@@ -3711,9 +3711,8 @@ +@@ -3712,9 +3712,8 @@ case ARRAY_TYPE: { struct gimplify_init_ctor_preeval_data preeval_data; @@ -18031,7 +17300,7 @@ /* Aggregate types must lower constructors to initialization of individual elements. The exception is that a CONSTRUCTOR node -@@ -3730,7 +3729,7 @@ +@@ -3731,7 +3730,7 @@ can only do so if it known to be a valid constant initializer. */ valid_const_initializer = categorize_ctor_elements (ctor, &num_nonzero_elements, @@ -18040,7 +17309,7 @@ /* If a const aggregate variable is being initialized, then it should never be a lose to promote the variable to be static. */ -@@ -3768,26 +3767,29 @@ +@@ -3769,26 +3768,29 @@ parts in, then generate code for the non-constant parts. */ /* TODO. There's code in cp/typeck.c to do this. */ @@ -18087,5986 +17356,6454 @@ /* If there are "lots" of initialized elements, and all of them are valid address constants, then the entire initializer can ---- a/src/gcc/hooks.c -+++ b/src/gcc/hooks.c -@@ -101,6 +101,15 @@ +--- a/src/gcc/graphite-sese-to-poly.c ++++ b/src/gcc/graphite-sese-to-poly.c +@@ -1721,7 +1721,7 @@ + + FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) + for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) +- if (dr_may_alias_p (dr1, dr2)) ++ if (dr_may_alias_p (dr1, dr2, true)) + edge_num++; + + fprintf (file, "$\n"); +@@ -1733,7 +1733,7 @@ + + FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) + for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) +- if (dr_may_alias_p (dr1, dr2)) ++ if (dr_may_alias_p (dr1, dr2, true)) + fprintf (file, "e %d %d\n", i + 1, j + 1); + return true; - } +@@ -1763,7 +1763,7 @@ -+/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT) -+ and returns false. */ -+bool -+hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED, -+ unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED) + FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) + for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) +- if (dr_may_alias_p (dr1, dr2)) ++ if (dr_may_alias_p (dr1, dr2, true)) + fprintf (file, "n%d n%d\n", i, j); + + return true; +@@ -1789,7 +1789,7 @@ + + FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) + for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++) +- if (dr_may_alias_p (dr1, dr2)) ++ if (dr_may_alias_p (dr1, dr2, true)) + fprintf (file, "%d %d\n", i, j); + + return true; +@@ -1825,7 +1825,7 @@ + + FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1) + for (j = i+1; VEC_iterate (data_reference_p, drs, j, dr2); j++) +- if (dr_may_alias_p (dr1, dr2)) ++ if (dr_may_alias_p (dr1, dr2, true)) + { + add_edge (g, i, j); + add_edge (g, j, i); +--- a/src/gcc/haifa-sched.c ++++ b/src/gcc/haifa-sched.c +@@ -348,6 +348,14 @@ + /* Create empty basic block after the specified block. */ + basic_block (* sched_create_empty_bb) (basic_block); + ++/* Return the number of cycles until INSN is expected to be ready. ++ Return zero if it already is. */ ++static int ++insn_delay (rtx insn) +{ -+ return false; ++ return MAX (INSN_TICK (insn) - clock_var, 0); +} + - /* Generic hook that takes (FILE *, const char *) and does nothing. */ - void - hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED) ---- a/src/gcc/hooks.h -+++ b/src/gcc/hooks.h -@@ -34,6 +34,8 @@ - extern bool hook_bool_mode_true (enum machine_mode); - extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx); - extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx); -+extern bool hook_bool_mode_uhwi_false (enum machine_mode, -+ unsigned HOST_WIDE_INT); - extern bool hook_bool_tree_false (tree); - extern bool hook_bool_const_tree_false (const_tree); - extern bool hook_bool_tree_true (tree); ---- a/src/gcc/ifcvt.c -+++ b/src/gcc/ifcvt.c -@@ -1,5 +1,6 @@ - /* If-conversion support. -- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 -+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, -+ 2011 - Free Software Foundation, Inc. + static int + may_trap_exp (const_rtx x, int is_store) + { +@@ -571,10 +579,10 @@ - This file is part of GCC. -@@ -304,6 +305,10 @@ + /* Do register pressure sensitive insn scheduling if the flag is set + up. */ +-bool sched_pressure_p; ++enum sched_pressure_algorithm sched_pressure; - for (insn = start; ; insn = NEXT_INSN (insn)) + /* Map regno -> its cover class. The map defined only when +- SCHED_PRESSURE_P is true. */ ++ SCHED_PRESSURE != SCHED_PRESSURE_NONE. */ + enum reg_class *sched_regno_cover_class; + + /* The current register pressure. Only elements corresponding cover +@@ -602,10 +610,12 @@ + bitmap_clear (region_ref_regs); + } + +-/* Update current register pressure related info after birth (if +- BIRTH_P) or death of register REGNO. */ +-static void +-mark_regno_birth_or_death (int regno, bool birth_p) ++/* PRESSURE[CL] describes the pressure on register class CL. Update it ++ for the birth (if BIRTH_P) or death (if !BIRTH_P) of register REGNO. ++ LIVE tracks the set of live registers; if it is null, assume that ++ every birth or death is genuine. */ ++static inline void ++mark_regno_birth_or_death (bitmap live, int *pressure, int regno, bool birth_p) + { + enum reg_class cover_class; + +@@ -616,15 +626,17 @@ + { + if (birth_p) + { +- bitmap_set_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class] +- += ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)]; ++ if (!live || bitmap_set_bit (live, regno)) ++ pressure[cover_class] ++ += (ira_reg_class_nregs ++ [cover_class][PSEUDO_REGNO_MODE (regno)]); + } + else + { +- bitmap_clear_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class] +- -= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)]; ++ if (!live || bitmap_clear_bit (live, regno)) ++ pressure[cover_class] ++ -= (ira_reg_class_nregs ++ [cover_class][PSEUDO_REGNO_MODE (regno)]); + } + } + } +@@ -633,13 +645,13 @@ { -+ /* dwarf2out can't cope with conditional prologues. */ -+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END) -+ return FALSE; -+ - if (NOTE_P (insn) || DEBUG_INSN_P (insn)) - goto insn_done; + if (birth_p) + { +- bitmap_set_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class]++; ++ if (!live || bitmap_set_bit (live, regno)) ++ pressure[cover_class]++; + } + else + { +- bitmap_clear_bit (curr_reg_live, regno); +- curr_reg_pressure[cover_class]--; ++ if (!live || bitmap_clear_bit (live, regno)) ++ pressure[cover_class]--; + } + } + } +@@ -657,8 +669,10 @@ + curr_reg_pressure[ira_reg_class_cover[i]] = 0; + bitmap_clear (curr_reg_live); + EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi) +- if (current_nr_blocks == 1 || bitmap_bit_p (region_ref_regs, j)) +- mark_regno_birth_or_death (j, true); ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ || current_nr_blocks == 1 ++ || bitmap_bit_p (region_ref_regs, j)) ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, j, true); + } ---- a/src/gcc/internal-fn.c -+++ b/src/gcc/internal-fn.c -@@ -0,0 +1,147 @@ -+/* Internal functions. -+ Copyright (C) 2011 Free Software Foundation, Inc. + /* Mark registers in X as mentioned in the current region. */ +@@ -712,7 +726,8 @@ + if (regno == INVALID_REGNUM) + break; + if (! bitmap_bit_p (df_get_live_in (bb), regno)) +- mark_regno_birth_or_death (regno, true); ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, ++ regno, true); + } + #endif + } +@@ -956,19 +971,19 @@ + return true; + } + +-/* Compute the number of nondebug forward deps of an insn. */ ++/* Compute the number of nondebug deps in list LIST for INSN. */ + + static int +-dep_list_size (rtx insn) ++dep_list_size (rtx insn, sd_list_types_def list) + { + sd_iterator_def sd_it; + dep_t dep; + int dbgcount = 0, nodbgcount = 0; + + if (!MAY_HAVE_DEBUG_INSNS) +- return sd_lists_size (insn, SD_LIST_FORW); ++ return sd_lists_size (insn, list); + +- FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) ++ FOR_EACH_DEP (insn, list, sd_it, dep) + { + if (DEBUG_INSN_P (DEP_CON (dep))) + dbgcount++; +@@ -976,7 +991,7 @@ + nodbgcount++; + } + +- gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, SD_LIST_FORW)); ++ gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, list)); + + return nodbgcount; + } +@@ -995,7 +1010,7 @@ + { + int this_priority = -1; + +- if (dep_list_size (insn) == 0) ++ if (dep_list_size (insn, SD_LIST_FORW) == 0) + /* ??? We should set INSN_PRIORITY to insn_cost when and insn has + some forward deps but all of them are ignored by + contributes_to_priority hook. At the moment we set priority of +@@ -1091,6 +1106,22 @@ + qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \ + while (0) + ++/* For each cover class CL, set DEATH[CL] to the number of registers ++ in that class that die in INSN. */ + -+This file is part of GCC. ++static void ++calculate_reg_deaths (rtx insn, int *death) ++{ ++ int i; ++ struct reg_use_data *use; + -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. ++ for (i = 0; i < ira_reg_class_cover_size; i++) ++ death[ira_reg_class_cover[i]] = 0; ++ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) ++ if (dying_use_p (use)) ++ mark_regno_birth_or_death (0, death, use->regno, true); ++} + -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. + /* Setup info about the current register pressure impact of scheduling + INSN at the current scheduling point. */ + static void +@@ -1102,23 +1133,12 @@ + enum reg_class cl; + struct reg_pressure_data *pressure_info; + int *max_reg_pressure; +- struct reg_use_data *use; + static int death[N_REG_CLASSES]; + + gcc_checking_assert (!DEBUG_INSN_P (insn)); + + excess_cost_change = 0; +- for (i = 0; i < ira_reg_class_cover_size; i++) +- death[ira_reg_class_cover[i]] = 0; +- for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) +- if (dying_use_p (use)) +- { +- cl = sched_regno_cover_class[use->regno]; +- if (use->regno < FIRST_PSEUDO_REGISTER) +- death[cl]++; +- else +- death[cl] += ira_reg_class_nregs[cl][PSEUDO_REGNO_MODE (use->regno)]; +- } ++ calculate_reg_deaths (insn, death); + pressure_info = INSN_REG_PRESSURE (insn); + max_reg_pressure = INSN_MAX_REG_PRESSURE (insn); + gcc_assert (pressure_info != NULL && max_reg_pressure != NULL); +@@ -1139,7 +1159,765 @@ + } + INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insn) = excess_cost_change; + } ++ ++/* This is the first page of code related to SCHED_PRESSURE_MODEL. ++ It tries to make the scheduler take register pressure into account ++ without introducing too many unnecessary stalls. It hooks into the ++ main scheduling algorithm at several points: ++ ++ - Before scheduling starts, model_start_schedule constructs a ++ "model schedule" for the current block. This model schedule is ++ chosen solely to keep register pressure down. It does not take the ++ target's pipeline or the original instruction order into account, ++ except as a tie-breaker. It also doesn't work to a particular ++ pressure limit. ++ ++ This model schedule gives us an idea of what pressure can be ++ achieved for the block gives us an example of a schedule that ++ keeps to that pressure. It also makes the final schedule less ++ dependent on the original instruction order. This is important ++ because the original order can either be "wide" (many values live ++ at once, such as in user-scheduled code) or "narrow" (few values ++ live at once, such as after loop unrolling, where several ++ iterations are executed sequentially). ++ ++ We do not apply this model schedule to the rtx stream. We simply ++ record it in model_schedule. We also compute the maximum pressure, ++ MP, that was seen during this schedule. ++ ++ - Instructions are added to the ready queue even if they require ++ a stall. The length of the stall is instead computed as: ++ ++ MAX (INSN_TICK (INSN) - clock_var, 0) ++ ++ (= insn_delay). This allows rank_for_schedule to choose between ++ introducing a deliberate stall or increasing pressure. ++ ++ - Before sorting the ready queue, model_set_excess_costs assigns ++ a pressure-based cost to each ready instruction in the queue. ++ This is the instruction's INSN_REG_PRESSURE_EXCESS_COST_CHANGE ++ (ECC for short) and is effectively measured in cycles. ++ ++ - rank_for_schedule ranks instructions based on: ++ ++ ECC (insn) + insn_delay (insn) ++ ++ then as: ++ ++ insn_delay (insn) ++ ++ So, for example, an instruction X1 with an ECC of 1 that can issue ++ now will win over an instruction X0 with an ECC of zero that would ++ introduce a stall of one cycle. However, an instruction X2 with an ++ ECC of 2 that can issue now will lose to X0. ++ ++ - When an instruction is scheduled, model_recompute updates the model ++ schedule with the new pressures (some of which might now exceed the ++ original maximum pressure MP). model_update_limit_points then searches ++ for the new point of maximum pressure, if not already known. */ ++ ++/* Used to separate high-verbosity debug information for SCHED_PRESSURE_MODEL ++ from surrounding debug information. */ ++#define MODEL_BAR \ ++ ";;\t\t+------------------------------------------------------\n" ++ ++/* Information about the pressure on a particular register class at a ++ particular point of the model schedule. */ ++struct model_pressure_data { ++ /* The pressure at this point of the model schedule, or -1 if the ++ point is associated with an instruction that has already been ++ scheduled. */ ++ int ref_pressure; + -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ ++ /* The maximum pressure during or after this point of the model schedule. */ ++ int max_pressure; ++}; + -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "gimple.h" -+#include "tree.h" -+#include "expr.h" -+#include "optabs.h" -+#include "recog.h" ++/* Per-instruction information that is used while building the model ++ schedule. Here, "schedule" refers to the model schedule rather ++ than the main schedule. */ ++struct model_insn_info { ++ /* The instruction itself. */ ++ rtx insn; + -+/* The names of each internal function, indexed by function number. */ -+const char *const internal_fn_name_array[] = { -+#define DEF_INTERNAL_FN(CODE, FLAGS) #CODE, -+#include "internal-fn.def" -+#undef DEF_INTERNAL_FN -+ "" ++ /* If this instruction is in model_worklist, these fields link to the ++ previous (higher-priority) and next (lower-priority) instructions ++ in the list. */ ++ struct model_insn_info *prev; ++ struct model_insn_info *next; ++ ++ /* While constructing the schedule, QUEUE_INDEX describes whether an ++ instruction has already been added to the schedule (QUEUE_SCHEDULED), ++ is in model_worklist (QUEUE_READY), or neither (QUEUE_NOWHERE). ++ old_queue records the value that QUEUE_INDEX had before scheduling ++ started, so that we can restore it once the schedule is complete. */ ++ int old_queue; ++ ++ /* The relative importance of an unscheduled instruction. Higher ++ values indicate greater importance. */ ++ unsigned int model_priority; ++ ++ /* The length of the longest path of satisfied true dependencies ++ that leads to this instruction. */ ++ unsigned int depth; ++ ++ /* The length of the longest path of dependencies of any kind ++ that leads from this instruction. */ ++ unsigned int alap; ++ ++ /* The number of predecessor nodes that must still be scheduled. */ ++ int unscheduled_preds; +}; + -+/* The ECF_* flags of each internal function, indexed by function number. */ -+const int internal_fn_flags_array[] = { -+#define DEF_INTERNAL_FN(CODE, FLAGS) FLAGS, -+#include "internal-fn.def" -+#undef DEF_INTERNAL_FN -+ 0 ++/* Information about the pressure limit for a particular register class. ++ This structure is used when applying a model schedule to the main ++ schedule. */ ++struct model_pressure_limit { ++ /* The maximum register pressure seen in the original model schedule. */ ++ int orig_pressure; ++ ++ /* The maximum register pressure seen in the current model schedule ++ (which excludes instructions that have already been scheduled). */ ++ int pressure; ++ ++ /* The point of the current model schedule at which PRESSURE is first ++ reached. It is set to -1 if the value needs to be recomputed. */ ++ int point; +}; + -+/* ARRAY_TYPE is an array of vector modes. Return the associated insn -+ for load-lanes-style optab OPTAB. The insn must exist. */ ++/* Describes a particular way of measuring register pressure. */ ++struct model_pressure_group { ++ /* Index CCI describes the maximum pressure on ira_reg_class_cover[CCI]. */ ++ struct model_pressure_limit limits[N_REG_CLASSES]; ++ ++ /* Index (POINT * ira_num_pressure_classes + CCI) describes the pressure ++ on register class ira_reg_class_cover[CCI] at point POINT of the ++ current model schedule. A POINT of model_num_insns describes the ++ pressure at the end of the schedule. */ ++ struct model_pressure_data *model; ++}; + -+static enum insn_code -+get_multi_vector_move (tree array_type, convert_optab optab) -+{ -+ enum insn_code icode; -+ enum machine_mode imode; -+ enum machine_mode vmode; ++/* Index POINT gives the instruction at point POINT of the model schedule. ++ This array doesn't change during main scheduling. */ ++static VEC (rtx, heap) *model_schedule; + -+ gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE); -+ imode = TYPE_MODE (array_type); -+ vmode = TYPE_MODE (TREE_TYPE (array_type)); ++/* The list of instructions in the model worklist, sorted in order of ++ decreasing priority. */ ++static struct model_insn_info *model_worklist; + -+ icode = convert_optab_handler (optab, imode, vmode); -+ gcc_assert (icode != CODE_FOR_nothing); -+ return icode; -+} ++/* Index I describes the instruction with INSN_LUID I. */ ++static struct model_insn_info *model_insns; + -+/* Expand LOAD_LANES call STMT. */ ++/* The number of instructions in the model schedule. */ ++static int model_num_insns; + -+static void -+expand_LOAD_LANES (gimple stmt) -+{ -+ tree type, lhs, rhs; -+ rtx target, mem; -+ enum insn_code icode; -+ const struct insn_operand_data *operand; ++/* The index of the first instruction in model_schedule that hasn't yet been ++ added to the main schedule, or model_num_insns if all of them have. */ ++static int model_curr_point; + -+ lhs = gimple_call_lhs (stmt); -+ rhs = gimple_call_arg (stmt, 0); -+ type = TREE_TYPE (lhs); ++/* Describes the pressure before each instruction in the model schedule. */ ++static struct model_pressure_group model_before_pressure; + -+ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); -+ mem = expand_normal (rhs); ++/* The first unused model_priority value (as used in model_insn_info). */ ++static unsigned int model_next_priority; + -+ gcc_assert (REG_P (target)); -+ gcc_assert (MEM_P (mem)); -+ PUT_MODE (mem, TYPE_MODE (type)); + -+ icode = get_multi_vector_move (type, vec_load_lanes_optab); ++/* The model_pressure_data for ira_reg_class_cover[CCI] in GROUP ++ at point POINT of the model schedule. */ ++#define MODEL_PRESSURE_DATA(GROUP, POINT, CCI) \ ++ (&(GROUP)->model[(POINT) * ira_reg_class_cover_size + (CCI)]) + -+ operand = &insn_data[(int) icode].operand[1]; -+ if (operand->predicate && !operand->predicate (mem, operand->mode)) -+ mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0))); ++/* The maximum pressure on ira_reg_class_cover[CCI] in GROUP at or ++ after point POINT of the model schedule. */ ++#define MODEL_MAX_PRESSURE(GROUP, POINT, CCI) \ ++ (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->max_pressure) + -+ emit_insn (GEN_FCN (icode) (target, mem)); ++/* The pressure on ira_reg_class_cover[CCI] in GROUP at point POINT ++ of the model schedule. */ ++#define MODEL_REF_PRESSURE(GROUP, POINT, CCI) \ ++ (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->ref_pressure) ++ ++/* Information about INSN that is used when creating the model schedule. */ ++#define MODEL_INSN_INFO(INSN) \ ++ (&model_insns[INSN_LUID (INSN)]) ++ ++/* The instruction at point POINT of the model schedule. */ ++#define MODEL_INSN(POINT) \ ++ (VEC_index (rtx, model_schedule, POINT)) ++ ++ ++/* Return INSN's index in the model schedule, or model_num_insns if it ++ doesn't belong to that schedule. */ ++ ++static int ++model_index (rtx insn) ++{ ++ if (INSN_MODEL_INDEX (insn) == 0) ++ return model_num_insns; ++ return INSN_MODEL_INDEX (insn) - 1; +} + -+/* Expand STORE_LANES call STMT. */ ++/* Make sure that GROUP->limits is up-to-date for the current point ++ of the model schedule. */ + +static void -+expand_STORE_LANES (gimple stmt) ++model_update_limit_points_in_group (struct model_pressure_group *group) +{ -+ tree type, lhs, rhs; -+ rtx target, reg; -+ enum insn_code icode; -+ const struct insn_operand_data *operand; ++ int cci, max_pressure, point; + -+ lhs = gimple_call_lhs (stmt); -+ rhs = gimple_call_arg (stmt, 0); -+ type = TREE_TYPE (rhs); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ /* We may have passed the final point at which the pressure in ++ group->limits[cci].pressure was reached. Update the limit if so. */ ++ max_pressure = MODEL_MAX_PRESSURE (group, model_curr_point, cci); ++ group->limits[cci].pressure = max_pressure; + -+ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); -+ reg = expand_normal (rhs); ++ /* Find the point at which MAX_PRESSURE is first reached. We need ++ to search in three cases: + -+ gcc_assert (MEM_P (target)); -+ PUT_MODE (target, TYPE_MODE (type)); ++ - We've already moved past the previous pressure point. ++ In this case we search forward from model_curr_point. + -+ icode = get_multi_vector_move (type, vec_store_lanes_optab); ++ - We scheduled the previous point of maximum pressure ahead of ++ its position in the model schedule, but doing so didn't bring ++ the pressure point earlier. In this case we search forward ++ from that previous pressure point. + -+ operand = &insn_data[(int) icode].operand[0]; -+ if (operand->predicate && !operand->predicate (target, operand->mode)) -+ target = replace_equiv_address (target, -+ force_reg (Pmode, XEXP (target, 0))); ++ - Scheduling an instruction early caused the maximum pressure ++ to decrease. In this case we will have set the pressure ++ point to -1, and we search forward from model_curr_point. */ ++ point = MAX (group->limits[cci].point, model_curr_point); ++ while (point < model_num_insns ++ && MODEL_REF_PRESSURE (group, point, cci) < max_pressure) ++ point++; ++ group->limits[cci].point = point; + -+ operand = &insn_data[(int) icode].operand[1]; -+ if (operand->predicate && !operand->predicate (reg, operand->mode)) -+ reg = force_reg (TYPE_MODE (type), reg); ++ gcc_assert (MODEL_REF_PRESSURE (group, point, cci) == max_pressure); ++ gcc_assert (MODEL_MAX_PRESSURE (group, point, cci) == max_pressure); ++ } ++} + -+ emit_insn (GEN_FCN (icode) (target, reg)); ++/* Make sure that all register-pressure limits are up-to-date for the ++ current position in the model schedule. */ ++ ++static void ++model_update_limit_points (void) ++{ ++ model_update_limit_points_in_group (&model_before_pressure); +} + -+/* Routines to expand each internal function, indexed by function number. -+ Each routine has the prototype: ++/* Return the model_index of the last unscheduled use in chain USE ++ outside of USE's instruction. Return -1 if there are no other uses, ++ or model_num_insns if the register is live at the end of the block. */ + -+ expand_ (gimple stmt) ++static int ++model_last_use_except (struct reg_use_data *use) ++{ ++ struct reg_use_data *next; ++ int last, index; + -+ where STMT is the statement that performs the call. */ -+static void (*const internal_fn_expanders[]) (gimple) = { -+#define DEF_INTERNAL_FN(CODE, FLAGS) expand_##CODE, -+#include "internal-fn.def" -+#undef DEF_INTERNAL_FN -+ 0 -+}; ++ last = -1; ++ for (next = use->next_regno_use; next != use; next = next->next_regno_use) ++ if (NONDEBUG_INSN_P (next->insn) ++ && QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED) ++ { ++ index = model_index (next->insn); ++ if (index == model_num_insns) ++ return model_num_insns; ++ if (last < index) ++ last = index; ++ } ++ return last; ++} + -+/* Expand STMT, which is a call to internal function FN. */ ++/* An instruction with model_index POINT has just been scheduled, and it ++ adds DELTA to the pressure on ira_reg_class_cover[CCI] after POINT - 1. ++ Update MODEL_REF_PRESSURE (GROUP, POINT, CCI) and ++ MODEL_MAX_PRESSURE (GROUP, POINT, CCI) accordingly. */ + -+void -+expand_internal_call (gimple stmt) ++static void ++model_start_update_pressure (struct model_pressure_group *group, ++ int point, int cci, int delta) +{ -+ internal_fn_expanders[(int) gimple_call_internal_fn (stmt)] (stmt); -+} ---- a/src/gcc/internal-fn.def -+++ b/src/gcc/internal-fn.def -@@ -0,0 +1,42 @@ -+/* Internal functions. -+ Copyright (C) 2011 Free Software Foundation, Inc. ++ int next_max_pressure; + -+This file is part of GCC. ++ if (point == model_num_insns) ++ { ++ /* The instruction wasn't part of the model schedule; it was moved ++ from a different block. Update the pressure for the end of ++ the model schedule. */ ++ MODEL_REF_PRESSURE (group, point, cci) += delta; ++ MODEL_MAX_PRESSURE (group, point, cci) += delta; ++ } ++ else ++ { ++ /* Record that this instruction has been scheduled. Nothing now ++ changes between POINT and POINT + 1, so get the maximum pressure ++ from the latter. If the maximum pressure decreases, the new ++ pressure point may be before POINT. */ ++ MODEL_REF_PRESSURE (group, point, cci) = -1; ++ next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci); ++ if (MODEL_MAX_PRESSURE (group, point, cci) > next_max_pressure) ++ { ++ MODEL_MAX_PRESSURE (group, point, cci) = next_max_pressure; ++ if (group->limits[cci].point == point) ++ group->limits[cci].point = -1; ++ } ++ } ++} + -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. ++/* Record that scheduling a later instruction has changed the pressure ++ at point POINT of the model schedule by DELTA (which might be 0). ++ Update GROUP accordingly. Return nonzero if these changes might ++ trigger changes to previous points as well. */ + -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. ++static int ++model_update_pressure (struct model_pressure_group *group, ++ int point, int cci, int delta) ++{ ++ int ref_pressure, max_pressure, next_max_pressure; + -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ ++ /* If POINT hasn't yet been scheduled, update its pressure. */ ++ ref_pressure = MODEL_REF_PRESSURE (group, point, cci); ++ if (ref_pressure >= 0 && delta != 0) ++ { ++ ref_pressure += delta; ++ MODEL_REF_PRESSURE (group, point, cci) = ref_pressure; ++ ++ /* Check whether the maximum pressure in the overall schedule ++ has increased. (This means that the MODEL_MAX_PRESSURE of ++ every point <= POINT will need to increae too; see below.) */ ++ if (group->limits[cci].pressure < ref_pressure) ++ group->limits[cci].pressure = ref_pressure; ++ ++ /* If we are at maximum pressure, and the maximum pressure ++ point was previously unknown or later than POINT, ++ bring it forward. */ ++ if (group->limits[cci].pressure == ref_pressure ++ && !IN_RANGE (group->limits[cci].point, 0, point)) ++ group->limits[cci].point = point; ++ ++ /* If POINT used to be the point of maximum pressure, but isn't ++ any longer, we need to recalculate it using a forward walk. */ ++ if (group->limits[cci].pressure > ref_pressure ++ && group->limits[cci].point == point) ++ group->limits[cci].point = -1; ++ } ++ ++ /* Update the maximum pressure at POINT. Changes here might also ++ affect the maximum pressure at POINT - 1. */ ++ next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci); ++ max_pressure = MAX (ref_pressure, next_max_pressure); ++ if (MODEL_MAX_PRESSURE (group, point, cci) != max_pressure) ++ { ++ MODEL_MAX_PRESSURE (group, point, cci) = max_pressure; ++ return 1; ++ } ++ return 0; ++} + -+/* This file specifies a list of internal "functions". These functions -+ differ from built-in functions in that they have no linkage and cannot -+ be called directly by the user. They represent operations that are only -+ synthesised by GCC itself. ++/* INSN has just been scheduled. Update the model schedule accordingly. */ + -+ Internal functions are used instead of tree codes if the operation -+ and its operands are more naturally represented as a GIMPLE_CALL -+ than a GIMPLE_ASSIGN. ++static void ++model_recompute (rtx insn) ++{ ++ struct { ++ int last_use; ++ int regno; ++ } uses[FIRST_PSEUDO_REGISTER + MAX_RECOG_OPERANDS]; ++ struct reg_use_data *use; ++ struct reg_pressure_data *reg_pressure; ++ int delta[N_REG_CLASSES]; ++ int cci, point, mix, new_last, cl, ref_pressure, queue; ++ unsigned int i, num_uses, num_pending_births; ++ bool print_p; ++ ++ /* The destinations of INSN were previously live from POINT onwards, but are ++ now live from model_curr_point onwards. Set up DELTA accordingly. */ ++ point = model_index (insn); ++ reg_pressure = INSN_REG_PRESSURE (insn); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ delta[cl] = reg_pressure[cci].set_increase; ++ } ++ ++ /* Record which registers previously died at POINT, but which now die ++ before POINT. Adjust DELTA so that it represents the effect of ++ this change after POINT - 1. Set NUM_PENDING_BIRTHS to the number of ++ registers that will be born in the range [model_curr_point, POINT). */ ++ num_uses = 0; ++ num_pending_births = 0; ++ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) ++ { ++ new_last = model_last_use_except (use); ++ if (new_last < point) ++ { ++ gcc_assert (num_uses < ARRAY_SIZE (uses)); ++ uses[num_uses].last_use = new_last; ++ uses[num_uses].regno = use->regno; ++ /* This register is no longer live after POINT - 1. */ ++ mark_regno_birth_or_death (NULL, delta, use->regno, false); ++ num_uses++; ++ if (new_last >= 0) ++ num_pending_births++; ++ } ++ } + -+ Each entry in this file has the form: ++ /* Update the MODEL_REF_PRESSURE and MODEL_MAX_PRESSURE for POINT. ++ Also set each group pressure limit for POINT. */ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ model_start_update_pressure (&model_before_pressure, ++ point, cci, delta[cl]); ++ } ++ ++ /* Walk the model schedule backwards, starting immediately before POINT. */ ++ print_p = false; ++ if (point != model_curr_point) ++ do ++ { ++ point--; ++ insn = MODEL_INSN (point); ++ queue = QUEUE_INDEX (insn); + ++ if (queue != QUEUE_SCHEDULED) ++ { ++ /* DELTA describes the effect of the move on the register pressure ++ after POINT. Make it describe the effect on the pressure ++ before POINT. */ ++ i = 0; ++ while (i < num_uses) ++ { ++ if (uses[i].last_use == point) ++ { ++ /* This register is now live again. */ ++ mark_regno_birth_or_death (NULL, delta, ++ uses[i].regno, true); ++ ++ /* Remove this use from the array. */ ++ uses[i] = uses[num_uses - 1]; ++ num_uses--; ++ num_pending_births--; ++ } ++ else ++ i++; ++ } + -+ DEF_INTERNAL_FN (NAME, FLAGS) ++ if (sched_verbose >= 5) ++ { ++ char buf[2048]; + -+ where NAME is the name of the function and FLAGS is a set of -+ ECF_* flags. Each entry must have a corresponding expander -+ of the form: ++ if (!print_p) ++ { ++ fprintf (sched_dump, MODEL_BAR); ++ fprintf (sched_dump, ";;\t\t| New pressure for model" ++ " schedule\n"); ++ fprintf (sched_dump, MODEL_BAR); ++ print_p = true; ++ } + -+ void expand_NAME (gimple stmt) ++ print_pattern (buf, PATTERN (insn), 0); ++ fprintf (sched_dump, ";;\t\t| %3d %4d %-30s ", ++ point, INSN_UID (insn), buf); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ ref_pressure = MODEL_REF_PRESSURE (&model_before_pressure, ++ point, cci); ++ fprintf (sched_dump, " %s:[%d->%d]", ++ reg_class_names[ira_reg_class_cover[cci]], ++ ref_pressure, ref_pressure + delta[cl]); ++ } ++ fprintf (sched_dump, "\n"); ++ } ++ } + -+ where STMT is the statement that performs the call. */ ++ /* Adjust the pressure at POINT. Set MIX to nonzero if POINT - 1 ++ might have changed as well. */ ++ mix = num_pending_births; ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ mix |= delta[cl]; ++ mix |= model_update_pressure (&model_before_pressure, ++ point, cci, delta[cl]); ++ } ++ } ++ while (mix && point > model_curr_point); + -+DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF) -+DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF) ---- a/src/gcc/internal-fn.h -+++ b/src/gcc/internal-fn.h -@@ -0,0 +1,52 @@ -+/* Internal functions. -+ Copyright (C) 2011 Free Software Foundation, Inc. ++ if (print_p) ++ fprintf (sched_dump, MODEL_BAR); ++} ++ ++/* model_spill_cost (CL, P, P') returns the cost of increasing the ++ pressure on CL from P to P'. We use this to calculate a "base ECC", ++ baseECC (CL, X), for each cover class CL and each instruction X. ++ Supposing X changes the pressure on CL from P to P', and that the ++ maximum pressure on CL in the current model schedule is MP', then: ++ ++ * if X occurs before or at the next point of maximum pressure in ++ the model schedule and P' > MP', then: ++ ++ baseECC (CL, X) = model_spill_cost (CL, MP, P') ++ ++ The idea is that the pressure after scheduling a fixed set of ++ instructions -- in this case, the set up to and including the ++ next maximum pressure point -- is going to be the same regardless ++ of the order; we simply want to keep the intermediate pressure ++ under control. Thus X has a cost of zero unless scheduling it ++ now would exceed MP'. ++ ++ If all increases in the set are by the same amount, no zero-cost ++ instruction will ever cause the pressure to exceed MP'. However, ++ if X is instead moved past an instruction X' with pressure in the ++ range (MP' - (P' - P), MP'), the pressure at X' will increase ++ beyond MP'. Since baseECC is very much a heuristic anyway, ++ it doesn't seem worth the overhead of tracking cases like these. ++ ++ The cost of exceeding MP' is always based on the original maximum ++ pressure MP. This is so that going 2 registers over the original ++ limit has the same cost regardless of whether it comes from two ++ separate +1 deltas or from a single +2 delta. ++ ++ * if X occurs after the next point of maximum pressure in the model ++ schedule and P' > P, then: ++ ++ baseECC (CL, X) = model_spill_cost (CL, MP, MP' + (P' - P)) ++ ++ That is, if we move X forward across a point of maximum pressure, ++ and if X increases the pressure by P' - P, then we conservatively ++ assume that scheduling X next would increase the maximum pressure ++ by P' - P. Again, the cost of doing this is based on the original ++ maximum pressure MP, for the same reason as above. ++ ++ * if P' < P, P > MP, and X occurs at or after the next point of ++ maximum pressure, then: ++ ++ baseECC (CL, X) = -model_spill_cost (CL, MAX (MP, P'), P) ++ ++ That is, if we have already exceeded the original maximum pressure MP, ++ and if X might reduce the maximum pressure again -- or at least push ++ it further back, and thus allow more scheduling freedom -- it is given ++ a negative cost to reflect the improvement. ++ ++ * otherwise, ++ ++ baseECC (CL, X) = 0 ++ ++ In this case, X is not expected to affect the maximum pressure MP', ++ so it has zero cost. ++ ++ We then create a combined value baseECC (X) that is the sum of ++ baseECC (CL, X) for each cover class CL. ++ ++ baseECC (X) could itself be used as the ECC value described above. ++ However, this is often too conservative, in the sense that it ++ tends to make high-priority instructions that increase pressure ++ wait too long in cases where introducing a spill would be better. ++ For this reason the final ECC is a priority-adjusted form of ++ baseECC (X). Specifically, we calculate: ++ ++ P (X) = INSN_PRIORITY (X) - insn_delay (X) - baseECC (X) ++ baseP = MAX { P (X) | baseECC (X) <= 0 } ++ ++ Then: ++ ++ ECC (X) = MAX (MIN (baseP - P (X), baseECC (X)), 0) ++ ++ Thus an instruction's effect on pressure is ignored if it has a high ++ enough priority relative to the ones that don't increase pressure. ++ Negative values of baseECC (X) do not increase the priority of X ++ itself, but they do make it harder for other instructions to ++ increase the pressure further. ++ ++ This pressure cost is deliberately timid. The intention has been ++ to choose a heuristic that rarely interferes with the normal list ++ scheduler in cases where that scheduler would produce good code. ++ We simply want to curb some of its worst excesses. */ ++ ++/* Return the cost of increasing the pressure in class CL from FROM to TO. ++ ++ Here we use the very simplistic cost model that every register above ++ ira_available_class_regs[CL] has a spill cost of 1. We could use other ++ measures instead, such as one based on MEMORY_MOVE_COST. However: ++ ++ (1) In order for an instruction to be scheduled, the higher cost ++ would need to be justified in a single saving of that many stalls. ++ This is overly pessimistic, because the benefit of spilling is ++ often to avoid a sequence of several short stalls rather than ++ a single long one. ++ ++ (2) The cost is still arbitrary. Because we are not allocating ++ registers during scheduling, we have no way of knowing for ++ sure how many memory accesses will be required by each spill, ++ where the spills will be placed within the block, or even ++ which block(s) will contain the spills. ++ ++ So a higher cost than 1 is often too conservative in practice, ++ forcing blocks to contain unnecessary stalls instead of spill code. ++ The simple cost below seems to be the best compromise. It reduces ++ the interference with the normal list scheduler, which helps make ++ it more suitable for a default-on option. */ + -+This file is part of GCC. ++static int ++model_spill_cost (int cl, int from, int to) ++{ ++ from = MAX (from, ira_available_class_regs[cl]); ++ return MAX (to, from) - from; ++} + -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. ++/* Return baseECC (ira_reg_class_cover[CCI], POINT), given that ++ P = curr_reg_pressure[ira_reg_class_cover[CCI]] and that ++ P' = P + DELTA. */ + -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. ++static int ++model_excess_group_cost (struct model_pressure_group *group, ++ int point, int cci, int delta) ++{ ++ int pressure, cl; + -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ ++ cl = ira_reg_class_cover[cci]; ++ if (delta < 0 && point >= group->limits[cci].point) ++ { ++ pressure = MAX (group->limits[cci].orig_pressure, ++ curr_reg_pressure[cl] + delta); ++ return -model_spill_cost (cl, pressure, curr_reg_pressure[cl]); ++ } + -+#ifndef GCC_INTERNAL_FN_H -+#define GCC_INTERNAL_FN_H ++ if (delta > 0) ++ { ++ if (point > group->limits[cci].point) ++ pressure = group->limits[cci].pressure + delta; ++ else ++ pressure = curr_reg_pressure[cl] + delta; + -+enum internal_fn { -+#define DEF_INTERNAL_FN(CODE, FLAGS) IFN_##CODE, -+#include "internal-fn.def" -+#undef DEF_INTERNAL_FN -+ IFN_LAST -+}; ++ if (pressure > group->limits[cci].pressure) ++ return model_spill_cost (cl, group->limits[cci].orig_pressure, ++ pressure); ++ } + -+extern const char *const internal_fn_name_array[]; -+extern const int internal_fn_flags_array[]; ++ return 0; ++} + -+/* Return the name of internal function FN. The name is only meaningful -+ for dumps; it has no linkage. */ ++/* Return baseECC (MODEL_INSN (INSN)). Dump the costs to sched_dump ++ if PRINT_P. */ + -+static inline const char * -+internal_fn_name (enum internal_fn fn) ++static int ++model_excess_cost (rtx insn, bool print_p) +{ -+ return internal_fn_name_array[(int) fn]; ++ int point, cci, cl, cost, this_cost, delta; ++ struct reg_pressure_data *insn_reg_pressure; ++ int insn_death[N_REG_CLASSES]; ++ ++ calculate_reg_deaths (insn, insn_death); ++ point = model_index (insn); ++ insn_reg_pressure = INSN_REG_PRESSURE (insn); ++ cost = 0; ++ ++ if (print_p) ++ fprintf (sched_dump, ";;\t\t| %3d %4d | %4d %+3d |", point, ++ INSN_UID (insn), INSN_PRIORITY (insn), insn_delay (insn)); ++ ++ /* Sum up the individual costs for each register class. */ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ delta = insn_reg_pressure[cci].set_increase - insn_death[cl]; ++ this_cost = model_excess_group_cost (&model_before_pressure, ++ point, cci, delta); ++ cost += this_cost; ++ if (print_p) ++ fprintf (sched_dump, " %s:[%d base cost %d]", ++ reg_class_names[cl], delta, this_cost); ++ } ++ ++ if (print_p) ++ fprintf (sched_dump, "\n"); ++ ++ return cost; +} + -+/* Return the ECF_* flags for function FN. */ ++/* Dump the next points of maximum pressure for GROUP. */ + -+static inline int -+internal_fn_flags (enum internal_fn fn) ++static void ++model_dump_pressure_points (struct model_pressure_group *group) +{ -+ return internal_fn_flags_array[(int) fn]; ++ int cci, cl; ++ ++ fprintf (sched_dump, ";;\t\t| pressure points"); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ fprintf (sched_dump, " %s:[%d->%d at ", reg_class_names[cl], ++ curr_reg_pressure[cl], group->limits[cci].pressure); ++ if (group->limits[cci].point < model_num_insns) ++ fprintf (sched_dump, "%d:%d]", group->limits[cci].point, ++ INSN_UID (MODEL_INSN (group->limits[cci].point))); ++ else ++ fprintf (sched_dump, "end]"); ++ } ++ fprintf (sched_dump, "\n"); +} + -+extern void expand_internal_call (gimple); ++/* Set INSN_REG_PRESSURE_EXCESS_COST_CHANGE for INSNS[0...COUNT-1]. */ + -+#endif ---- a/src/gcc/ipa-prop.c -+++ b/src/gcc/ipa-prop.c -@@ -1418,6 +1418,8 @@ - { - tree target = gimple_call_fn (call); - -+ if (!target) -+ return; - if (TREE_CODE (target) == SSA_NAME) - ipa_analyze_indirect_call_uses (node, info, parms_info, call, target); - else if (TREE_CODE (target) == OBJ_TYPE_REF) ---- a/src/gcc/loop-doloop.c -+++ b/src/gcc/loop-doloop.c -@@ -78,6 +78,8 @@ - rtx inc_src; - rtx condition; - rtx pattern; -+ rtx cc_reg = NULL_RTX; -+ rtx reg_orig = NULL_RTX; - - /* The canonical doloop pattern we expect has one of the following - forms: -@@ -96,7 +98,16 @@ - 2) (set (reg) (plus (reg) (const_int -1)) - (set (pc) (if_then_else (reg != 0) - (label_ref (label)) -- (pc))). */ -+ (pc))). ++static void ++model_set_excess_costs (rtx *insns, int count) ++{ ++ int i, cost, priority_base, priority; ++ bool print_p; + -+ Some targets (ARM) do the comparison before the branch, as in the -+ following form: ++ /* Record the baseECC value for each instruction in the model schedule, ++ except that negative costs are converted to zero ones now rather thatn ++ later. Do not assign a cost to debug instructions, since they must ++ not change code-generation decisions. Experiments suggest we also ++ get better results by not assigning a cost to instructions from ++ a different block. ++ ++ Set PRIORITY_BASE to baseP in the block comment above. This is the ++ maximum priority of the "cheap" instructions, which should always ++ include the next model instruction. */ ++ priority_base = 0; ++ print_p = false; ++ for (i = 0; i < count; i++) ++ if (INSN_MODEL_INDEX (insns[i])) ++ { ++ if (sched_verbose >= 6 && !print_p) ++ { ++ fprintf (sched_dump, MODEL_BAR); ++ fprintf (sched_dump, ";;\t\t| Pressure costs for ready queue\n"); ++ model_dump_pressure_points (&model_before_pressure); ++ fprintf (sched_dump, MODEL_BAR); ++ print_p = true; ++ } ++ cost = model_excess_cost (insns[i], print_p); ++ if (cost <= 0) ++ { ++ priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]) - cost; ++ priority_base = MAX (priority_base, priority); ++ cost = 0; ++ } ++ INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = cost; ++ } ++ if (print_p) ++ fprintf (sched_dump, MODEL_BAR); + -+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) -+ (set (reg) (plus (reg) (const_int -1)))]) -+ (set (pc) (if_then_else (cc == NE) -+ (label_ref (label)) -+ (pc))) */ - - pattern = PATTERN (doloop_pat); ++ /* Use MAX (baseECC, 0) and baseP to calculcate ECC for each ++ instruction. */ ++ for (i = 0; i < count; i++) ++ { ++ cost = INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]); ++ priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]); ++ if (cost > 0 && priority > priority_base) ++ { ++ cost += priority_base - priority; ++ INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = MAX (cost, 0); ++ } ++ } ++} ++ + /* Returns a positive value if x is preferred; returns a negative value if + y is preferred. Should never return 0, since that will make the sort + unstable. */ +@@ -1170,23 +1948,20 @@ + /* Make sure that priority of TMP and TMP2 are initialized. */ + gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2)); -@@ -104,19 +115,47 @@ +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) { - rtx cond; - rtx prev_insn = prev_nondebug_insn (doloop_pat); -+ rtx cmp_arg1, cmp_arg2; -+ rtx cmp_orig; - -- /* We expect the decrement to immediately precede the branch. */ -+ /* In case the pattern is not PARALLEL we expect two forms -+ of doloop which are cases 2) and 3) above: in case 2) the -+ decrement immediately precedes the branch, while in case 3) -+ the compare and decrement instructions immediately precede -+ the branch. */ + int diff; - if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) - return 0; + /* Prefer insn whose scheduling results in the smallest register + pressure excess. */ + if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp) +- + (INSN_TICK (tmp) > clock_var +- ? INSN_TICK (tmp) - clock_var : 0) ++ + insn_delay (tmp) + - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2) +- - (INSN_TICK (tmp2) > clock_var +- ? INSN_TICK (tmp2) - clock_var : 0))) != 0) ++ - insn_delay (tmp2)))) + return diff; + } - cmp = pattern; -- inc = PATTERN (PREV_INSN (doloop_pat)); -+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) -+ { -+ /* The third case: the compare and decrement instructions -+ immediately precede the branch. */ -+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); -+ if (GET_CODE (cmp_orig) != SET) -+ return 0; -+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) -+ return 0; -+ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); -+ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); -+ if (cmp_arg2 != const0_rtx -+ || GET_CODE (cmp_arg1) != PLUS) -+ return 0; -+ reg_orig = XEXP (cmp_arg1, 0); -+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1) -+ || !REG_P (reg_orig)) -+ return 0; -+ cc_reg = SET_DEST (cmp_orig); -+ -+ inc = XVECEXP (PATTERN (prev_insn), 0, 1); -+ } -+ else -+ inc = PATTERN (prev_insn); - /* We expect the condition to be of the form (reg != 0) */ - cond = XEXP (SET_SRC (cmp), 0); - if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) - return 0; - - } - else +- if (sched_pressure_p ++ if (sched_pressure != SCHED_PRESSURE_NONE + && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var)) { -@@ -162,11 +201,15 @@ - return 0; + if (INSN_TICK (tmp) <= clock_var) +@@ -1277,11 +2052,22 @@ + return val; + } - if ((XEXP (condition, 0) == reg) -+ /* For the third case: */ -+ || ((cc_reg != NULL_RTX) -+ && (XEXP (condition, 0) == cc_reg) -+ && (reg_orig == reg)) - || (GET_CODE (XEXP (condition, 0)) == PLUS -- && XEXP (XEXP (condition, 0), 0) == reg)) -+ && XEXP (XEXP (condition, 0), 0) == reg)) - { - if (GET_CODE (pattern) != PARALLEL) -- /* The second form we expect: -+ /* For the second form we expect: ++ /* Prefer instructions that occur earlier in the model schedule. */ ++ if (sched_pressure == SCHED_PRESSURE_MODEL) ++ { ++ int diff; ++ ++ diff = model_index (tmp) - model_index (tmp2); ++ if (diff != 0) ++ return diff; ++ } ++ + /* Prefer the insn which has more later insns that depend on it. + This gives the scheduler more freedom when scheduling later + instructions at the expense of added register pressure. */ - (set (reg) (plus (reg) (const_int -1)) - (set (pc) (if_then_else (reg != 0) -@@ -181,7 +224,24 @@ - (set (reg) (plus (reg) (const_int -1))) - (additional clobbers and uses)]) +- val = (dep_list_size (tmp2) - dep_list_size (tmp)); ++ val = (dep_list_size (tmp2, SD_LIST_FORW) ++ - dep_list_size (tmp, SD_LIST_FORW)); -- So we return that form instead. -+ For the third form we expect: + if (flag_sched_dep_count_heuristic && val != 0) + return val; +@@ -1480,12 +2266,15 @@ + int i; + rtx *first = ready_lastpos (ready); + +- if (sched_pressure_p) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + { + for (i = 0; i < ready->n_ready; i++) + if (!DEBUG_INSN_P (first[i])) + setup_insn_reg_pressure_info (first[i]); + } ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns) ++ model_set_excess_costs (first, ready->n_ready); + SCHED_SORT (first, ready->n_ready); + } + +@@ -1551,10 +2340,12 @@ + gcc_checking_assert (!DEBUG_INSN_P (insn)); + + for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) +- if (dying_use_p (use) && bitmap_bit_p (curr_reg_live, use->regno)) +- mark_regno_birth_or_death (use->regno, false); ++ if (dying_use_p (use)) ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, ++ use->regno, false); + for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set) +- mark_regno_birth_or_death (set->regno, true); ++ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, ++ set->regno, true); + } + + /* Set up or update (if UPDATE_P) max register pressure (see its +@@ -1626,11 +2417,618 @@ + void + sched_setup_bb_reg_pressure_info (basic_block bb, rtx after) + { +- gcc_assert (sched_pressure_p); ++ gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED); + initiate_bb_reg_pressure_info (bb); + setup_insn_max_reg_pressure (after, false); + } ++ ++/* Return (in order): + -+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) -+ (set (reg) (plus (reg) (const_int -1)))]) -+ (set (pc) (if_then_else (cc == NE) -+ (label_ref (label)) -+ (pc))) ++ - positive if INSN adversely affects the pressure on one ++ register class + -+ which is equivalent to the following: ++ - negative if INSN reduces the pressure on one register class + -+ (parallel [(set (cc) (compare (reg, 1)) -+ (set (reg) (plus (reg) (const_int -1))) -+ (set (pc) (if_then_else (NE == cc) -+ (label_ref (label)) -+ (pc))))]) ++ - 0 if INSN doesn't affect the pressure on any register class. */ + -+ So we return the second form instead for the two cases. ++static int ++model_classify_pressure (struct model_insn_info *insn) ++{ ++ struct reg_pressure_data *reg_pressure; ++ int death[N_REG_CLASSES]; ++ int cci, cl, sum; ++ ++ calculate_reg_deaths (insn->insn, death); ++ reg_pressure = INSN_REG_PRESSURE (insn->insn); ++ sum = 0; ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ if (death[cl] < reg_pressure[cci].set_increase) ++ return 1; ++ sum += reg_pressure[cci].set_increase - death[cl]; ++ } ++ return sum; ++} + - */ - condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); - ---- a/src/gcc/modulo-sched.c -+++ b/src/gcc/modulo-sched.c -@@ -116,14 +116,18 @@ - - /* The number of different iterations the nodes in ps span, assuming - the stage boundaries are placed efficiently. */ --#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ -- + 1 + (ps)->ii - 1) / (ps)->ii) -+#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \ -+ + 1 + ii - 1) / ii) -+/* The stage count of ps. */ -+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) - - /* A single instruction in the partial schedule. */ - struct ps_insn - { -- /* The corresponding DDG_NODE. */ -- ddg_node_ptr node; -+ /* Identifies the instruction to be scheduled. Values smaller than -+ the ddg's num_nodes refer directly to ddg nodes. A value of -+ X - num_nodes refers to register move X. */ -+ int id; - - /* The (absolute) cycle in which the PS instruction is scheduled. - Same as SCHED_TIME (node). */ -@@ -133,10 +137,35 @@ - ps_insn_ptr next_in_row, - prev_in_row; - -- /* The number of nodes in the same row that come after this node. */ -- int row_rest_count; - }; - -+/* Information about a register move that has been added to a partial -+ schedule. */ -+struct ps_reg_move_info ++/* Return true if INSN1 should come before INSN2 in the model schedule. */ ++ ++static int ++model_order_p (struct model_insn_info *insn1, struct model_insn_info *insn2) +{ -+ /* The source of the move is defined by the ps_insn with id DEF. -+ The destination is used by the ps_insns with the ids in USES. */ -+ int def; -+ sbitmap uses; ++ unsigned int height1, height2; ++ unsigned int priority1, priority2; + -+ /* The original form of USES' instructions used OLD_REG, but they -+ should now use NEW_REG. */ -+ rtx old_reg; -+ rtx new_reg; ++ /* Prefer instructions with a higher model priority. */ ++ if (insn1->model_priority != insn2->model_priority) ++ return insn1->model_priority > insn2->model_priority; ++ ++ /* Combine the length of the longest path of satisfied true dependencies ++ that leads to each instruction (depth) with the length of the longest ++ path of any dependencies that leads from the instruction (alap). ++ Prefer instructions with the greatest combined length. If the combined ++ lengths are equal, prefer instructions with the greatest depth. ++ ++ The idea is that, if we have a set S of "equal" instructions that each ++ have ALAP value X, and we pick one such instruction I, any true-dependent ++ successors of I that have ALAP value X - 1 should be preferred over S. ++ This encourages the schedule to be "narrow" rather than "wide". ++ However, if I is a low-priority instruction that we decided to ++ schedule because of its model_classify_pressure, and if there ++ is a set of higher-priority instructions T, the aforementioned ++ successors of I should not have the edge over T. */ ++ height1 = insn1->depth + insn1->alap; ++ height2 = insn2->depth + insn2->alap; ++ if (height1 != height2) ++ return height1 > height2; ++ if (insn1->depth != insn2->depth) ++ return insn1->depth > insn2->depth; ++ ++ /* We have no real preference between INSN1 an INSN2 as far as attempts ++ to reduce pressure go. Prefer instructions with higher priorities. */ ++ priority1 = INSN_PRIORITY (insn1->insn); ++ priority2 = INSN_PRIORITY (insn2->insn); ++ if (priority1 != priority2) ++ return priority1 > priority2; + -+ /* The number of consecutive stages that the move occupies. */ -+ int num_consecutive_stages; ++ /* Use the original rtl sequence as a tie-breaker. */ ++ return insn1 < insn2; ++} + -+ /* An instruction that sets NEW_REG to the correct value. The first -+ move associated with DEF will have an rhs of OLD_REG; later moves -+ use the result of the previous move. */ -+ rtx insn; -+}; ++/* Add INSN to the model worklist immediately after PREV. Add it to the ++ beginning of the list if PREV is null. */ + -+typedef struct ps_reg_move_info ps_reg_move_info; -+DEF_VEC_O (ps_reg_move_info); -+DEF_VEC_ALLOC_O (ps_reg_move_info, heap); ++static void ++model_add_to_worklist_at (struct model_insn_info *insn, ++ struct model_insn_info *prev) ++{ ++ gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_NOWHERE); ++ QUEUE_INDEX (insn->insn) = QUEUE_READY; + - /* Holds the partial schedule as an array of II rows. Each entry of the - array points to a linked list of PS_INSNs, which represents the - instructions that are scheduled for that row. */ -@@ -148,6 +177,16 @@ - /* rows[i] points to linked list of insns scheduled in row i (0<=inum_nodes. */ -+ VEC (ps_reg_move_info, heap) *reg_moves; ++ insn->prev = prev; ++ if (prev) ++ { ++ insn->next = prev->next; ++ prev->next = insn; ++ } ++ else ++ { ++ insn->next = model_worklist; ++ model_worklist = insn; ++ } ++ if (insn->next) ++ insn->next->prev = insn; ++} + -+ /* rows_length[i] holds the number of instructions in the row. -+ It is used only (as an optimization) to back off quickly from -+ trying to schedule a node in a full row; that is, to avoid running -+ through futile DFA state transitions. */ -+ int *rows_length; -+ - /* The earliest absolute cycle of an insn in the partial schedule. */ - int min_cycle; - -@@ -155,29 +194,18 @@ - int max_cycle; - - ddg_ptr g; /* The DDG of the insns in the partial schedule. */ --}; - --/* We use this to record all the register replacements we do in -- the kernel so we can undo SMS if it is not profitable. */ --struct undo_replace_buff_elem --{ -- rtx insn; -- rtx orig_reg; -- rtx new_reg; -- struct undo_replace_buff_elem *next; -+ int stage_count; /* The stage count of the partial schedule. */ - }; - - -- - static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); - static void free_partial_schedule (partial_schedule_ptr); - static void reset_partial_schedule (partial_schedule_ptr, int new_ii); - void print_partial_schedule (partial_schedule_ptr, FILE *); - static void verify_partial_schedule (partial_schedule_ptr, sbitmap); - static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, -- ddg_node_ptr node, int cycle, -- sbitmap must_precede, -- sbitmap must_follow); -+ int, int, sbitmap, sbitmap); - static void rotate_partial_schedule (partial_schedule_ptr, int); - void set_row_column_for_ps (partial_schedule_ptr); - static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); -@@ -193,34 +221,27 @@ - static void permute_partial_schedule (partial_schedule_ptr, rtx); - static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, - rtx, rtx); --static void duplicate_insns_of_cycles (partial_schedule_ptr, -- int, int, int, rtx); -- --#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) --#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) --#define SCHED_FIRST_REG_MOVE(x) \ -- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) --#define SCHED_NREG_MOVES(x) \ -- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) --#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) --#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) --#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) -+static int calculate_stage_count (partial_schedule_ptr, int); -+static void calculate_must_precede_follow (ddg_node_ptr, int, int, -+ int, int, sbitmap, sbitmap, sbitmap); -+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, -+ sbitmap, int, int *, int *, int *); -+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, -+ sbitmap, int *, sbitmap, sbitmap); -+static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); -+ -+#define NODE_ASAP(node) ((node)->aux.count) ++/* Remove INSN from the model worklist. */ + -+#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x) -+#define SCHED_TIME(x) (SCHED_PARAMS (x)->time) -+#define SCHED_ROW(x) (SCHED_PARAMS (x)->row) -+#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) -+#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) - - /* The scheduling parameters held for each node. */ - typedef struct node_sched_params - { -- int asap; /* A lower-bound on the absolute scheduling cycle. */ -- int time; /* The absolute scheduling cycle (time >= asap). */ -- -- /* The following field (first_reg_move) is a pointer to the first -- register-move instruction added to handle the modulo-variable-expansion -- of the register defined by this node. This register-move copies the -- original register defined by the node. */ -- rtx first_reg_move; -- -- /* The number of register-move instructions added, immediately preceding -- first_reg_move. */ -- int nreg_moves; -+ int time; /* The absolute scheduling cycle. */ - - int row; /* Holds time % ii. */ - int stage; /* Holds time / ii. */ -@@ -230,6 +251,9 @@ - int column; - } *node_sched_params_ptr; - -+typedef struct node_sched_params node_sched_params; -+DEF_VEC_O (node_sched_params); -+DEF_VEC_ALLOC_O (node_sched_params, heap); - - /* The following three functions are copied from the current scheduler - code in order to use sched_analyze() for computing the dependencies. -@@ -279,6 +303,49 @@ - 0 - }; - -+/* Partial schedule instruction ID in PS is a register move. Return -+ information about it. */ -+static struct ps_reg_move_info * -+ps_reg_move (partial_schedule_ptr ps, int id) ++static void ++model_remove_from_worklist (struct model_insn_info *insn) +{ -+ gcc_checking_assert (id >= ps->g->num_nodes); -+ return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes); ++ gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_READY); ++ QUEUE_INDEX (insn->insn) = QUEUE_NOWHERE; ++ ++ if (insn->prev) ++ insn->prev->next = insn->next; ++ else ++ model_worklist = insn->next; ++ if (insn->next) ++ insn->next->prev = insn->prev; +} + -+/* Return the rtl instruction that is being scheduled by partial schedule -+ instruction ID, which belongs to schedule PS. */ -+static rtx -+ps_rtl_insn (partial_schedule_ptr ps, int id) -+{ -+ if (id < ps->g->num_nodes) -+ return ps->g->nodes[id].insn; ++/* Add INSN to the model worklist. Start looking for a suitable position ++ between neighbors PREV and NEXT, testing at most MAX_SCHED_READY_INSNS ++ insns either side. A null PREV indicates the beginning of the list and ++ a null NEXT indicates the end. */ ++ ++static void ++model_add_to_worklist (struct model_insn_info *insn, ++ struct model_insn_info *prev, ++ struct model_insn_info *next) ++{ ++ int count; ++ ++ count = MAX_SCHED_READY_INSNS; ++ if (count > 0 && prev && model_order_p (insn, prev)) ++ do ++ { ++ count--; ++ prev = prev->prev; ++ } ++ while (count > 0 && prev && model_order_p (insn, prev)); + else -+ return ps_reg_move (ps, id)->insn; ++ while (count > 0 && next && model_order_p (next, insn)) ++ { ++ count--; ++ prev = next; ++ next = next->next; ++ } ++ model_add_to_worklist_at (insn, prev); +} + -+/* Partial schedule instruction ID, which belongs to PS, occured in -+ the original (unscheduled) loop. Return the first instruction -+ in the loop that was associated with ps_rtl_insn (PS, ID). -+ If the instruction had some notes before it, this is the first -+ of those notes. */ -+static rtx -+ps_first_note (partial_schedule_ptr ps, int id) ++/* INSN may now have a higher priority (in the model_order_p sense) ++ than before. Move it up the worklist if necessary. */ ++ ++static void ++model_promote_insn (struct model_insn_info *insn) +{ -+ gcc_assert (id < ps->g->num_nodes); -+ return ps->g->nodes[id].first_note; ++ struct model_insn_info *prev; ++ int count; ++ ++ prev = insn->prev; ++ count = MAX_SCHED_READY_INSNS; ++ while (count > 0 && prev && model_order_p (insn, prev)) ++ { ++ count--; ++ prev = prev->prev; ++ } ++ if (prev != insn->prev) ++ { ++ model_remove_from_worklist (insn); ++ model_add_to_worklist_at (insn, prev); ++ } +} + -+/* Return the number of consecutive stages that are occupied by -+ partial schedule instruction ID in PS. */ -+static int -+ps_num_consecutive_stages (partial_schedule_ptr ps, int id) ++/* Add INSN to the end of the model schedule. */ ++ ++static void ++model_add_to_schedule (rtx insn) +{ -+ if (id < ps->g->num_nodes) -+ return 1; -+ else -+ return ps_reg_move (ps, id)->num_consecutive_stages; ++ unsigned int point; ++ ++ gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE); ++ QUEUE_INDEX (insn) = QUEUE_SCHEDULED; ++ ++ point = VEC_length (rtx, model_schedule); ++ VEC_quick_push (rtx, model_schedule, insn); ++ INSN_MODEL_INDEX (insn) = point + 1; +} + - /* Given HEAD and TAIL which are the first and last insns in a loop; - return the register which controls the loop. Return zero if it has - more than one occurrence in the loop besides the control part or the -@@ -310,10 +377,10 @@ - either a single (parallel) branch-on-count or a (non-parallel) - branch immediately preceded by a single (decrement) insn. */ - first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail -- : PREV_INSN (tail)); -+ : prev_nondebug_insn (tail)); - - for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) -- if (reg_mentioned_p (reg, insn)) -+ if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn)) - { - if (dump_file) - { -@@ -379,35 +446,59 @@ - } - - --/* Points to the array that contains the sched data for each node. */ --static node_sched_params_ptr node_sched_params; -+/* A vector that contains the sched data for each ps_insn. */ -+static VEC (node_sched_params, heap) *node_sched_param_vec; - --/* Allocate sched_params for each node and initialize it. Assumes that -- the aux field of each node contain the asap bound (computed earlier), -- and copies it into the sched_params field. */ -+/* Allocate sched_params for each node and initialize it. */ - static void - set_node_sched_params (ddg_ptr g) - { -- int i; -+ VEC_truncate (node_sched_params, node_sched_param_vec, 0); -+ VEC_safe_grow_cleared (node_sched_params, heap, -+ node_sched_param_vec, g->num_nodes); ++/* Analyze the instructions that are to be scheduled, setting up ++ MODEL_INSN_INFO (...) and model_num_insns accordingly. Add ready ++ instructions to model_worklist. */ ++ ++static void ++model_analyze_insns (void) ++{ ++ rtx start, end, iter; ++ sd_iterator_def sd_it; ++ dep_t dep; ++ struct model_insn_info *insn, *con; ++ ++ model_num_insns = 0; ++ start = PREV_INSN (current_sched_info->next_tail); ++ end = current_sched_info->prev_head; ++ for (iter = start; iter != end; iter = PREV_INSN (iter)) ++ if (NONDEBUG_INSN_P (iter)) ++ { ++ insn = MODEL_INSN_INFO (iter); ++ insn->insn = iter; ++ FOR_EACH_DEP (iter, SD_LIST_FORW, sd_it, dep) ++ { ++ con = MODEL_INSN_INFO (DEP_CON (dep)); ++ if (con->insn && insn->alap < con->alap + 1) ++ insn->alap = con->alap + 1; ++ } ++ ++ insn->old_queue = QUEUE_INDEX (iter); ++ QUEUE_INDEX (iter) = QUEUE_NOWHERE; ++ ++ insn->unscheduled_preds = dep_list_size (iter, SD_LIST_HARD_BACK); ++ if (insn->unscheduled_preds == 0) ++ model_add_to_worklist (insn, NULL, model_worklist); ++ ++ model_num_insns++; ++ } +} - -- /* Allocate for each node in the DDG a place to hold the "sched_data". */ -- /* Initialize ASAP/ALAP/HIGHT to zero. */ -- node_sched_params = (node_sched_params_ptr) -- xcalloc (g->num_nodes, -- sizeof (struct node_sched_params)); -+/* Make sure that node_sched_param_vec has an entry for every move in PS. */ ++ ++/* The global state describes the register pressure at the start of the ++ model schedule. Initialize GROUP accordingly. */ ++ +static void -+extend_node_sched_params (partial_schedule_ptr ps) ++model_init_pressure_group (struct model_pressure_group *group) +{ -+ VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec, -+ ps->g->num_nodes + VEC_length (ps_reg_move_info, -+ ps->reg_moves)); ++ int cci, cl; ++ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ group->limits[cci].pressure = curr_reg_pressure[cl]; ++ group->limits[cci].point = 0; ++ } ++ /* Use index model_num_insns to record the state after the last ++ instruction in the model schedule. */ ++ group->model = XNEWVEC (struct model_pressure_data, ++ (model_num_insns + 1) * ira_reg_class_cover_size); +} - -- /* Set the pointer of the general data of the node to point to the -- appropriate sched_params structure. */ -- for (i = 0; i < g->num_nodes; i++) -+/* Update the sched_params (time, row and stage) for node U using the II, -+ the CYCLE of U and MIN_CYCLE. -+ We're not simply taking the following -+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); -+ because the stages may not be aligned on cycle 0. */ ++ ++/* Record that MODEL_REF_PRESSURE (GROUP, POINT, CCI) is PRESSURE. ++ Update the maximum pressure for the whole schedule. */ ++ +static void -+update_node_sched_params (int u, int ii, int cycle, int min_cycle) ++model_record_pressure (struct model_pressure_group *group, ++ int point, int cci, int pressure) +{ -+ int sc_until_cycle_zero; -+ int stage; ++ MODEL_REF_PRESSURE (group, point, cci) = pressure; ++ if (group->limits[cci].pressure < pressure) ++ { ++ group->limits[cci].pressure = pressure; ++ group->limits[cci].point = point; ++ } ++} + -+ SCHED_TIME (u) = cycle; -+ SCHED_ROW (u) = SMODULO (cycle, ii); ++/* INSN has just been added to the end of the model schedule. Record its ++ register-pressure information. */ + -+ /* The calculation of stage count is done adding the number -+ of stages before cycle zero and after cycle zero. */ -+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); ++static void ++model_record_pressures (struct model_insn_info *insn) ++{ ++ struct reg_pressure_data *reg_pressure; ++ int point, cci, cl, delta; ++ int death[N_REG_CLASSES]; + -+ if (SCHED_TIME (u) < 0) ++ point = model_index (insn->insn); ++ if (sched_verbose >= 2) + { -+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); -+ SCHED_STAGE (u) = sc_until_cycle_zero - stage; ++ char buf[2048]; ++ ++ if (point == 0) ++ { ++ fprintf (sched_dump, "\n;;\tModel schedule:\n;;\n"); ++ fprintf (sched_dump, ";;\t| idx insn | mpri hght dpth prio |\n"); ++ } ++ print_pattern (buf, PATTERN (insn->insn), 0); ++ fprintf (sched_dump, ";;\t| %3d %4d | %4d %4d %4d %4d | %-30s ", ++ point, INSN_UID (insn->insn), insn->model_priority, ++ insn->depth + insn->alap, insn->depth, ++ INSN_PRIORITY (insn->insn), buf); ++ } ++ calculate_reg_deaths (insn->insn, death); ++ reg_pressure = INSN_REG_PRESSURE (insn->insn); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ cl = ira_reg_class_cover[cci]; ++ delta = reg_pressure[cci].set_increase - death[cl]; ++ if (sched_verbose >= 2) ++ fprintf (sched_dump, " %s:[%d,%+d]", reg_class_names[cl], ++ curr_reg_pressure[cl], delta); ++ model_record_pressure (&model_before_pressure, point, cci, ++ curr_reg_pressure[cl]); + } -+ else - { -- /* Watch out for aliasing problems? */ -- node_sched_params[i].asap = g->nodes[i].aux.count; -- g->nodes[i].aux.info = &node_sched_params[i]; -+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); -+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; - } - } - - static void --print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) -+print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) - { - int i; - -@@ -415,22 +506,170 @@ - return; - for (i = 0; i < num_nodes; i++) - { -- node_sched_params_ptr nsp = &node_sched_params[i]; -- rtx reg_move = nsp->first_reg_move; -- int j; -+ node_sched_params_ptr nsp = SCHED_PARAMS (i); ++ if (sched_verbose >= 2) ++ fprintf (sched_dump, "\n"); ++} ++ ++/* All instructions have been added to the model schedule. Record the ++ final register pressure in GROUP and set up all MODEL_MAX_PRESSUREs. */ - fprintf (file, "Node = %d; INSN = %d\n", i, -- (INSN_UID (g->nodes[i].insn))); -- fprintf (file, " asap = %d:\n", nsp->asap); -+ INSN_UID (ps_rtl_insn (ps, i))); -+ fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])); - fprintf (file, " time = %d:\n", nsp->time); -- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); -- for (j = 0; j < nsp->nreg_moves; j++) -+ fprintf (file, " stage = %d:\n", nsp->stage); ++static void ++model_record_final_pressures (struct model_pressure_group *group) ++{ ++ int point, cci, max_pressure, ref_pressure, cl; ++ ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) ++ { ++ /* Record the final pressure for this class. */ ++ cl = ira_reg_class_cover[cci]; ++ point = model_num_insns; ++ ref_pressure = curr_reg_pressure[cl]; ++ model_record_pressure (group, point, cci, ref_pressure); ++ ++ /* Record the original maximum pressure. */ ++ group->limits[cci].orig_pressure = group->limits[cci].pressure; ++ ++ /* Update the MODEL_MAX_PRESSURE for every point of the schedule. */ ++ max_pressure = ref_pressure; ++ MODEL_MAX_PRESSURE (group, point, cci) = max_pressure; ++ while (point > 0) ++ { ++ point--; ++ ref_pressure = MODEL_REF_PRESSURE (group, point, cci); ++ max_pressure = MAX (max_pressure, ref_pressure); ++ MODEL_MAX_PRESSURE (group, point, cci) = max_pressure; ++ } + } +} + -+/* Set SCHED_COLUMN for each instruction in row ROW of PS. */ ++/* Update all successors of INSN, given that INSN has just been scheduled. */ ++ +static void -+set_columns_for_row (partial_schedule_ptr ps, int row) ++model_add_successors_to_worklist (struct model_insn_info *insn) +{ -+ ps_insn_ptr cur_insn; -+ int column; ++ sd_iterator_def sd_it; ++ struct model_insn_info *con; ++ dep_t dep; + -+ column = 0; -+ for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) -+ SCHED_COLUMN (cur_insn->id) = column++; ++ FOR_EACH_DEP (insn->insn, SD_LIST_FORW, sd_it, dep) ++ { ++ con = MODEL_INSN_INFO (DEP_CON (dep)); ++ /* Ignore debug instructions, and instructions from other blocks. */ ++ if (con->insn) ++ { ++ con->unscheduled_preds--; ++ ++ /* Update the depth field of each true-dependent successor. ++ Increasing the depth gives them a higher priority than ++ before. */ ++ if (DEP_TYPE (dep) == REG_DEP_TRUE && con->depth < insn->depth + 1) ++ { ++ con->depth = insn->depth + 1; ++ if (QUEUE_INDEX (con->insn) == QUEUE_READY) ++ model_promote_insn (con); ++ } ++ ++ /* If this is a true dependency, or if there are no remaining ++ dependencies for CON (meaning that CON only had non-true ++ dependencies), make sure that CON is on the worklist. ++ We don't bother otherwise because it would tend to fill the ++ worklist with a lot of low-priority instructions that are not ++ yet ready to issue. */ ++ if ((con->depth > 0 || con->unscheduled_preds == 0) ++ && QUEUE_INDEX (con->insn) == QUEUE_NOWHERE) ++ model_add_to_worklist (con, insn, insn->next); ++ } ++ } +} + -+/* Set SCHED_COLUMN for each instruction in PS. */ ++/* Give INSN a higher priority than any current instruction, then give ++ unscheduled predecessors of INSN a higher priority still. If any of ++ those predecessors are not on the model worklist, do the same for its ++ predecessors, and so on. */ ++ +static void -+set_columns_for_ps (partial_schedule_ptr ps) ++model_promote_predecessors (struct model_insn_info *insn) +{ -+ int row; ++ struct model_insn_info *pro, *first; ++ sd_iterator_def sd_it; ++ dep_t dep; ++ ++ if (sched_verbose >= 7) ++ fprintf (sched_dump, ";;\t+--- priority of %d = %d, priority of", ++ INSN_UID (insn->insn), model_next_priority); ++ insn->model_priority = model_next_priority++; ++ model_remove_from_worklist (insn); ++ model_add_to_worklist_at (insn, NULL); + -+ for (row = 0; row < ps->ii; row++) -+ set_columns_for_row (ps, row); ++ first = NULL; ++ for (;;) ++ { ++ FOR_EACH_DEP (insn->insn, SD_LIST_HARD_BACK, sd_it, dep) ++ { ++ pro = MODEL_INSN_INFO (DEP_PRO (dep)); ++ /* The first test is to ignore debug instructions, and instructions ++ from other blocks. */ ++ if (pro->insn ++ && pro->model_priority != model_next_priority ++ && QUEUE_INDEX (pro->insn) != QUEUE_SCHEDULED) ++ { ++ pro->model_priority = model_next_priority; ++ if (sched_verbose >= 7) ++ fprintf (sched_dump, " %d", INSN_UID (pro->insn)); ++ if (QUEUE_INDEX (pro->insn) == QUEUE_READY) ++ { ++ /* PRO is already in the worklist, but it now has ++ a higher priority than before. Move it at the ++ appropriate place. */ ++ model_remove_from_worklist (pro); ++ model_add_to_worklist (pro, NULL, model_worklist); ++ } ++ else ++ { ++ /* PRO isn't in the worklist. Recursively process ++ its predecessors until we find one that is. */ ++ pro->next = first; ++ first = pro; ++ } ++ } ++ } ++ if (!first) ++ break; ++ insn = first; ++ first = insn->next; ++ } ++ if (sched_verbose >= 7) ++ fprintf (sched_dump, " = %d\n", model_next_priority); ++ model_next_priority++; +} + -+/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. -+ Its single predecessor has already been scheduled, as has its -+ ddg node successors. (The move may have also another move as its -+ successor, in which case that successor will be scheduled later.) ++/* Pick one instruction from model_worklist and process it. */ + -+ The move is part of a chain that satisfies register dependencies -+ between a producing ddg node and various consuming ddg nodes. -+ If some of these dependencies have a distance of 1 (meaning that -+ the use is upward-exposoed) then DISTANCE1_USES is nonnull and -+ contains the set of uses with distance-1 dependencies. -+ DISTANCE1_USES is null otherwise. ++static void ++model_choose_insn (void) ++{ ++ struct model_insn_info *insn, *fallback; ++ int count; + -+ MUST_FOLLOW is a scratch bitmap that is big enough to hold -+ all current ps_insn ids. ++ if (sched_verbose >= 7) ++ { ++ fprintf (sched_dump, ";;\t+--- worklist:\n"); ++ insn = model_worklist; ++ count = MAX_SCHED_READY_INSNS; ++ while (count > 0 && insn) ++ { ++ fprintf (sched_dump, ";;\t+--- %d [%d, %d, %d, %d]\n", ++ INSN_UID (insn->insn), insn->model_priority, ++ insn->depth + insn->alap, insn->depth, ++ INSN_PRIORITY (insn->insn)); ++ count--; ++ insn = insn->next; ++ } ++ } + -+ Return true on success. */ -+static bool -+schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, -+ sbitmap distance1_uses, sbitmap must_follow) ++ /* Look for a ready instruction whose model_classify_priority is zero ++ or negative, picking the highest-priority one. Adding such an ++ instruction to the schedule now should do no harm, and may actually ++ do some good. ++ ++ Failing that, see whether there is an instruction with the highest ++ extant model_priority that is not yet ready, but which would reduce ++ pressure if it became ready. This is designed to catch cases like: ++ ++ (set (mem (reg R1)) (reg R2)) ++ ++ where the instruction is the last remaining use of R1 and where the ++ value of R2 is not yet available (or vice versa). The death of R1 ++ means that this instruction already reduces pressure. It is of ++ course possible that the computation of R2 involves other registers ++ that are hard to kill, but such cases are rare enough for this ++ heuristic to be a win in general. ++ ++ Failing that, just pick the highest-priority instruction in the ++ worklist. */ ++ count = MAX_SCHED_READY_INSNS; ++ insn = model_worklist; ++ fallback = 0; ++ for (;;) ++ { ++ if (count == 0 || !insn) ++ { ++ insn = fallback ? fallback : model_worklist; ++ break; ++ } ++ if (insn->unscheduled_preds) ++ { ++ if (model_worklist->model_priority == insn->model_priority ++ && !fallback ++ && model_classify_pressure (insn) < 0) ++ fallback = insn; ++ } ++ else ++ { ++ if (model_classify_pressure (insn) <= 0) ++ break; ++ } ++ count--; ++ insn = insn->next; ++ } ++ ++ if (sched_verbose >= 7 && insn != model_worklist) ++ { ++ if (insn->unscheduled_preds) ++ fprintf (sched_dump, ";;\t+--- promoting insn %d, with dependencies\n", ++ INSN_UID (insn->insn)); ++ else ++ fprintf (sched_dump, ";;\t+--- promoting insn %d, which is ready\n", ++ INSN_UID (insn->insn)); ++ } ++ if (insn->unscheduled_preds) ++ /* INSN isn't yet ready to issue. Give all its predecessors the ++ highest priority. */ ++ model_promote_predecessors (insn); ++ else ++ { ++ /* INSN is ready. Add it to the end of model_schedule and ++ process its successors. */ ++ model_add_successors_to_worklist (insn); ++ model_remove_from_worklist (insn); ++ model_add_to_schedule (insn->insn); ++ model_record_pressures (insn); ++ update_register_pressure (insn->insn); ++ } ++} ++ ++/* Restore all QUEUE_INDEXs to the values that they had before ++ model_start_schedule was called. */ ++ ++static void ++model_reset_queue_indices (void) +{ -+ unsigned int u; -+ int this_time, this_distance, this_start, this_end, this_latency; -+ int start, end, c, ii; -+ sbitmap_iterator sbi; -+ ps_reg_move_info *move; -+ rtx this_insn; -+ ps_insn_ptr psi; ++ unsigned int i; ++ rtx insn; + -+ move = ps_reg_move (ps, i_reg_move); -+ ii = ps->ii; -+ if (dump_file) ++ FOR_EACH_VEC_ELT (rtx, model_schedule, i, insn) ++ QUEUE_INDEX (insn) = MODEL_INSN_INFO (insn)->old_queue; ++} ++ ++/* We have calculated the model schedule and spill costs. Print a summary ++ to sched_dump. */ ++ ++static void ++model_dump_pressure_summary (void) ++{ ++ int cci, cl; ++ ++ fprintf (sched_dump, ";; Pressure summary:"); ++ for (cci = 0; cci < ira_reg_class_cover_size; cci++) + { -+ fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" -+ ", min cycle = %d\n\n", INSN_UID (move->insn), ii, -+ PS_MIN_CYCLE (ps)); -+ print_rtl_single (dump_file, move->insn); -+ fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); -+ fprintf (dump_file, "=========== =========== =====\n"); ++ cl = ira_reg_class_cover[cci]; ++ fprintf (sched_dump, " %s:%d", reg_class_names[cl], ++ model_before_pressure.limits[cci].pressure); + } ++ fprintf (sched_dump, "\n\n"); ++} + -+ start = INT_MIN; -+ end = INT_MAX; ++/* Initialize the SCHED_PRESSURE_MODEL information for the current ++ scheduling region. */ + -+ /* For dependencies of distance 1 between a producer ddg node A -+ and consumer ddg node B, we have a chain of dependencies: ++static void ++model_start_schedule (void) ++{ ++ basic_block bb; + -+ A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B ++ model_next_priority = 1; ++ model_schedule = VEC_alloc (rtx, heap, sched_max_luid); ++ model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid); + -+ where Mi is the ith move. For dependencies of distance 0 between -+ a producer ddg node A and consumer ddg node C, we have a chain of -+ dependencies: ++ bb = BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head)); ++ initiate_reg_pressure_info (df_get_live_in (bb)); + -+ A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C ++ model_analyze_insns (); ++ model_init_pressure_group (&model_before_pressure); ++ while (model_worklist) ++ model_choose_insn (); ++ gcc_assert (model_num_insns == (int) VEC_length (rtx, model_schedule)); ++ if (sched_verbose >= 2) ++ fprintf (sched_dump, "\n"); + -+ where Mi' occupies the same position as Mi but occurs a stage later. -+ We can only schedule each move once, so if we have both types of -+ chain, we model the second as: ++ model_record_final_pressures (&model_before_pressure); ++ model_reset_queue_indices (); + -+ A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C ++ XDELETEVEC (model_insns); + -+ First handle the dependencies between the previously-scheduled -+ predecessor and the move. */ -+ this_insn = ps_rtl_insn (ps, move->def); -+ this_latency = insn_latency (this_insn, move->insn); -+ this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; -+ this_time = SCHED_TIME (move->def) - this_distance * ii; -+ this_start = this_time + this_latency; -+ this_end = this_time + ii; -+ if (dump_file) -+ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", -+ this_start, this_end, SCHED_TIME (move->def), -+ INSN_UID (this_insn), this_latency, this_distance, -+ INSN_UID (move->insn)); ++ model_curr_point = 0; ++ initiate_reg_pressure_info (df_get_live_in (bb)); ++ if (sched_verbose >= 1) ++ model_dump_pressure_summary (); ++} + -+ if (start < this_start) -+ start = this_start; -+ if (end > this_end) -+ end = this_end; ++/* Free the information associated with GROUP. */ + -+ /* Handle the dependencies between the move and previously-scheduled -+ successors. */ -+ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi) -+ { -+ this_insn = ps_rtl_insn (ps, u); -+ this_latency = insn_latency (move->insn, this_insn); -+ if (distance1_uses && !TEST_BIT (distance1_uses, u)) -+ this_distance = -1; ++static void ++model_finalize_pressure_group (struct model_pressure_group *group) ++{ ++ XDELETEVEC (group->model); ++} ++ ++/* Free the information created by model_start_schedule. */ ++ ++static void ++model_end_schedule (void) ++{ ++ model_finalize_pressure_group (&model_before_pressure); ++ VEC_free (rtx, heap, model_schedule); ++} ++ + /* INSN is the "currently executing insn". Launch each insn which was + waiting on INSN. READY is the ready list which contains the insns + that are ready to fire. CLOCK is the current cycle. The function +@@ -1667,10 +3065,14 @@ + reg_class_names[ira_reg_class_cover[i]], + pressure_info[i].set_increase, pressure_info[i].change); + } ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns ++ && model_index (insn) == model_curr_point) ++ fprintf (sched_dump, ":model %d", model_curr_point); + fputc ('\n', sched_dump); + } + +- if (sched_pressure_p && !DEBUG_INSN_P (insn)) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED && !DEBUG_INSN_P (insn)) + update_reg_and_insn_max_reg_pressure (insn); + + /* Scheduling instruction should have all its dependencies resolved and +@@ -1728,6 +3130,24 @@ + gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE); + QUEUE_INDEX (insn) = QUEUE_SCHEDULED; + ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns ++ && NONDEBUG_INSN_P (insn)) ++ { ++ if (model_index (insn) == model_curr_point) ++ do ++ model_curr_point++; ++ while (model_curr_point < model_num_insns ++ && (QUEUE_INDEX (MODEL_INSN (model_curr_point)) ++ == QUEUE_SCHEDULED)); + else -+ this_distance = 0; -+ this_time = SCHED_TIME (u) + this_distance * ii; -+ this_start = this_time - ii; -+ this_end = this_time - this_latency; -+ if (dump_file) -+ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", -+ this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn), -+ this_latency, this_distance, INSN_UID (this_insn)); ++ model_recompute (insn); ++ model_update_limit_points (); ++ update_register_pressure (insn); ++ if (sched_verbose >= 2) ++ print_curr_reg_pressure (); ++ } ++ + gcc_assert (INSN_TICK (insn) >= MIN_TICK); + if (INSN_TICK (insn) > clock_var) + /* INSN has been prematurely moved from the queue to the ready list. +@@ -2056,7 +3476,16 @@ + /* If the ready list is full, delay the insn for 1 cycle. + See the comment in schedule_block for the rationale. */ + if (!reload_completed +- && ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS ++ && (ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS ++ || (sched_pressure == SCHED_PRESSURE_MODEL ++ /* Limit pressure recalculations to MAX_SCHED_READY_INSNS ++ instructions too. */ ++ && model_index (insn) > (model_curr_point ++ + MAX_SCHED_READY_INSNS))) ++ && !(sched_pressure == SCHED_PRESSURE_MODEL ++ && model_curr_point < model_num_insns ++ /* Always allow the next model instruction to issue. */ ++ && model_index (insn) == model_curr_point) + && !SCHED_GROUP_P (insn) + && insn != skip_insn) + { +@@ -2293,12 +3722,12 @@ + fprintf (sched_dump, " %s:%d", + (*current_sched_info->print_insn) (p[i], 0), + INSN_LUID (p[i])); +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + fprintf (sched_dump, "(cost=%d", + INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i])); + if (INSN_TICK (p[i]) > clock_var) + fprintf (sched_dump, ":delay=%d", INSN_TICK (p[i]) - clock_var); +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + fprintf (sched_dump, ")"); + } + fprintf (sched_dump, "\n"); +@@ -2609,8 +4038,8 @@ + { + if (state_dead_lock_p (state) + || insn_finishes_cycle_p (insn)) +- /* We won't issue any more instructions in the next +- choice_state. */ ++ /* We won't issue any more instructions in the next ++ choice_state. */ + top->rest = 0; + else + top->rest--; +@@ -2813,6 +4242,59 @@ + } + } + ++/* Examine all insns on the ready list and queue those which can't be ++ issued in this cycle. TEMP_STATE is temporary scheduler state we ++ can use as scratch space. If FIRST_CYCLE_INSN_P is true, no insns ++ have been issued for the current cycle, which means it is valid to ++ issue an asm statement. */ + -+ if (start < this_start) -+ start = this_start; -+ if (end > this_end) -+ end = this_end; -+ } ++static void ++prune_ready_list (state_t temp_state, bool first_cycle_insn_p) ++{ ++ int i; + -+ if (dump_file) ++ restart: ++ for (i = 0; i < ready.n_ready; i++) + { -+ fprintf (dump_file, "----------- ----------- -----\n"); -+ fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); ++ rtx insn = ready_element (&ready, i); ++ int cost = 0; ++ ++ if (recog_memoized (insn) < 0) ++ { ++ if (!first_cycle_insn_p ++ && (GET_CODE (PATTERN (insn)) == ASM_INPUT ++ || asm_noperands (PATTERN (insn)) >= 0)) ++ cost = 1; ++ } ++ else if (sched_pressure != SCHED_PRESSURE_NONE) ++ { ++ if (sched_pressure == SCHED_PRESSURE_MODEL ++ && INSN_TICK (insn) <= clock_var) ++ { ++ memcpy (temp_state, curr_state, dfa_state_size); ++ if (state_transition (temp_state, insn) >= 0) ++ INSN_TICK (insn) = clock_var + 1; ++ } ++ cost = 0; ++ } ++ else ++ { ++ memcpy (temp_state, curr_state, dfa_state_size); ++ cost = state_transition (temp_state, insn); ++ if (cost < 0) ++ cost = 0; ++ else if (cost == 0) ++ cost = 1; ++ } ++ if (cost >= 1) ++ { ++ ready_remove (&ready, i); ++ queue_insn (insn, cost); ++ goto restart; ++ } + } ++} + -+ sbitmap_zero (must_follow); -+ SET_BIT (must_follow, move->def); + /* Use forward list scheduling to rearrange insns of block pointed to by + TARGET_BB, possibly bringing insns from subsequent blocks in the same + region. */ +@@ -2882,6 +4364,9 @@ + in try_ready () (which is called through init_ready_list ()). */ + (*current_sched_info->init_ready_list) (); + ++ if (sched_pressure == SCHED_PRESSURE_MODEL) ++ model_start_schedule (); + -+ start = MAX (start, end - (ii - 1)); -+ for (c = end; c >= start; c--) -+ { -+ psi = ps_add_node_check_conflicts (ps, i_reg_move, c, -+ move->uses, must_follow); -+ if (psi) - { -- fprintf (file, " reg_move = "); -- print_rtl_single (file, reg_move); -- reg_move = PREV_INSN (reg_move); -+ update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)); -+ if (dump_file) -+ fprintf (dump_file, "\nScheduled register move INSN %d at" -+ " time %d, row %d\n\n", INSN_UID (move->insn), c, -+ SCHED_ROW (i_reg_move)); -+ return true; + /* The algorithm is O(n^2) in the number of ready insns at any given + time in the worst case. Before reload we are more likely to have + big lists so truncate them to a reasonable size. */ +@@ -2963,6 +4448,10 @@ } - } -+ -+ if (dump_file) -+ fprintf (dump_file, "\nNo available slot\n\n"); + while (advance > 0); + ++ prune_ready_list (temp_state, true); ++ if (ready.n_ready == 0) ++ continue; + -+ return false; - } + if (sort_p) + { + /* Sort the ready list based on priority. */ +@@ -3040,7 +4529,7 @@ + fprintf (sched_dump, ";;\tReady list (t = %3d): ", + clock_var); + debug_ready_list (&ready); +- if (sched_pressure_p) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + print_curr_reg_pressure (); + } - /* -@@ -444,175 +683,201 @@ - nreg_moves = ----------------------------------- + 1 - { dependence. - ii { 1 if not. - */ --static struct undo_replace_buff_elem * --generate_reg_moves (partial_schedule_ptr ps, bool rescan) -+static bool -+schedule_reg_moves (partial_schedule_ptr ps) - { - ddg_ptr g = ps->g; - int ii = ps->ii; - int i; -- struct undo_replace_buff_elem *reg_move_replaces = NULL; +@@ -3084,7 +4573,8 @@ + else + insn = ready_remove_first (&ready); - for (i = 0; i < g->num_nodes; i++) - { - ddg_node_ptr u = &g->nodes[i]; - ddg_edge_ptr e; - int nreg_moves = 0, i_reg_move; -- sbitmap *uses_of_defs; -- rtx last_reg_move; - rtx prev_reg, old_reg; -- -+ int first_move; -+ int distances[2]; -+ sbitmap must_follow; -+ sbitmap distance1_uses; -+ rtx set = single_set (u->insn); -+ -+ /* Skip instructions that do not set a register. */ -+ if ((set && !REG_P (SET_DEST (set)))) -+ continue; -+ - /* Compute the number of reg_moves needed for u, by looking at life - ranges started at u (excluding self-loops). */ -+ distances[0] = distances[1] = false; - for (e = u->out; e; e = e->next_out) - if (e->type == TRUE_DEP && e->dest != e->src) - { -- int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; -+ int nreg_moves4e = (SCHED_TIME (e->dest->cuid) -+ - SCHED_TIME (e->src->cuid)) / ii; +- if (sched_pressure_p && INSN_TICK (insn) > clock_var) ++ if (sched_pressure != SCHED_PRESSURE_NONE ++ && INSN_TICK (insn) > clock_var) + { + ready_add (&ready, insn, true); + advance = 1; +@@ -3112,44 +4602,6 @@ + } - if (e->distance == 1) -- nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; -+ nreg_moves4e = (SCHED_TIME (e->dest->cuid) -+ - SCHED_TIME (e->src->cuid) + ii) / ii; + sort_p = TRUE; +- memcpy (temp_state, curr_state, dfa_state_size); +- if (recog_memoized (insn) < 0) +- { +- asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT +- || asm_noperands (PATTERN (insn)) >= 0); +- if (!first_cycle_insn_p && asm_p) +- /* This is asm insn which is tried to be issued on the +- cycle not first. Issue it on the next cycle. */ +- cost = 1; +- else +- /* A USE insn, or something else we don't need to +- understand. We can't pass these directly to +- state_transition because it will trigger a +- fatal error for unrecognizable insns. */ +- cost = 0; +- } +- else if (sched_pressure_p) +- cost = 0; +- else +- { +- cost = state_transition (temp_state, insn); +- if (cost < 0) +- cost = 0; +- else if (cost == 0) +- cost = 1; +- } +- +- if (cost >= 1) +- { +- queue_insn (insn, cost); +- if (SCHED_GROUP_P (insn)) +- { +- advance = cost; +- break; +- } +- +- continue; +- } - /* If dest precedes src in the schedule of the kernel, then dest - will read before src writes and we can save one reg_copy. */ -- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) -- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) -+ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) -+ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) - nreg_moves4e--; + if (current_sched_info->can_schedule_ready_p + && ! (*current_sched_info->can_schedule_ready_p) (insn)) +@@ -3200,11 +4652,17 @@ + reemit_notes (insn); + last_scheduled_insn = insn; -+ if (nreg_moves4e >= 1) -+ { -+ /* !single_set instructions are not supported yet and -+ thus we do not except to encounter them in the loop -+ except from the doloop part. For the latter case -+ we assume no regmoves are generated as the doloop -+ instructions are tied to the branch with an edge. */ -+ gcc_assert (set); -+ /* If the instruction contains auto-inc register then -+ validate that the regmov is being generated for the -+ target regsiter rather then the inc'ed register. */ -+ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); -+ } -+ -+ if (nreg_moves4e) -+ { -+ gcc_assert (e->distance < 2); -+ distances[e->distance] = true; -+ } - nreg_moves = MAX (nreg_moves, nreg_moves4e); - } +- if (memcmp (curr_state, temp_state, dfa_state_size) != 0) +- { +- cycle_issued_insns++; +- memcpy (curr_state, temp_state, dfa_state_size); +- } ++ if (recog_memoized (insn) >= 0) ++ { ++ cost = state_transition (curr_state, insn); ++ if (sched_pressure != SCHED_PRESSURE_WEIGHTED) ++ gcc_assert (cost < 0); ++ cycle_issued_insns++; ++ asm_p = false; ++ } ++ else ++ asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT ++ || asm_noperands (PATTERN (insn)) >= 0); - if (nreg_moves == 0) - continue; + if (targetm.sched.variable_issue) + can_issue_more = +@@ -3225,6 +4683,9 @@ + + first_cycle_insn_p = false; + ++ if (ready.n_ready > 0) ++ prune_ready_list (temp_state, false); ++ + /* Sort the ready list based on priority. This must be + redone here, as schedule_insn may have readied additional + insns that will not be sorted correctly. */ +@@ -3321,6 +4782,9 @@ + } + } -+ /* Create NREG_MOVES register moves. */ -+ first_move = VEC_length (ps_reg_move_info, ps->reg_moves); -+ VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves, -+ first_move + nreg_moves); -+ extend_node_sched_params (ps); -+ -+ /* Record the moves associated with this node. */ -+ first_move += ps->g->num_nodes; ++ if (sched_pressure == SCHED_PRESSURE_MODEL) ++ model_end_schedule (); + -+ /* Generate each move. */ -+ old_reg = prev_reg = SET_DEST (single_set (u->insn)); -+ for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) + if (sched_verbose) + fprintf (sched_dump, ";; total time = %d\n", clock_var); + +@@ -3424,10 +4888,14 @@ + if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON)) + targetm.sched.dispatch_do (NULL_RTX, DISPATCH_INIT); + +- sched_pressure_p = (flag_sched_pressure && ! reload_completed +- && common_sched_info->sched_pass_id == SCHED_RGN_PASS); ++ if (flag_sched_pressure ++ && !reload_completed ++ && common_sched_info->sched_pass_id == SCHED_RGN_PASS) ++ sched_pressure = flag_sched_pressure_algorithm; ++ else ++ sched_pressure = SCHED_PRESSURE_NONE; + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + ira_setup_eliminable_regset (); + + /* Initialize SPEC_INFO. */ +@@ -3504,7 +4972,7 @@ + if (targetm.sched.init_global) + targetm.sched.init_global (sched_dump, sched_verbose, get_max_uid () + 1); + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { + int i, max_regno = max_reg_num (); + +@@ -3517,8 +4985,11 @@ + ? ira_class_translate[REGNO_REG_CLASS (i)] + : reg_cover_class (i)); + curr_reg_live = BITMAP_ALLOC (NULL); +- saved_reg_live = BITMAP_ALLOC (NULL); +- region_ref_regs = BITMAP_ALLOC (NULL); ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + { -+ ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); -+ -+ move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; -+ move->uses = sbitmap_alloc (first_move + nreg_moves); -+ move->old_reg = old_reg; -+ move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); -+ move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; -+ move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); -+ sbitmap_zero (move->uses); -+ -+ prev_reg = move->new_reg; ++ saved_reg_live = BITMAP_ALLOC (NULL); ++ region_ref_regs = BITMAP_ALLOC (NULL); + } -+ -+ distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; -+ - /* Every use of the register defined by node may require a different - copy of this register, depending on the time the use is scheduled. -- Set a bitmap vector, telling which nodes use each copy of this -- register. */ -- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); -- sbitmap_vector_zero (uses_of_defs, nreg_moves); -+ Record which uses require which move results. */ - for (e = u->out; e; e = e->next_out) - if (e->type == TRUE_DEP && e->dest != e->src) - { -- int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; -+ int dest_copy = (SCHED_TIME (e->dest->cuid) -+ - SCHED_TIME (e->src->cuid)) / ii; + } - if (e->distance == 1) -- dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; -+ dest_copy = (SCHED_TIME (e->dest->cuid) -+ - SCHED_TIME (e->src->cuid) + ii) / ii; + curr_state = xmalloc (dfa_state_size); +@@ -3618,12 +5089,15 @@ + sched_finish (void) + { + haifa_finish_h_i_d (); +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { +- free (sched_regno_cover_class); +- BITMAP_FREE (region_ref_regs); +- BITMAP_FREE (saved_reg_live); ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) ++ { ++ BITMAP_FREE (region_ref_regs); ++ BITMAP_FREE (saved_reg_live); ++ } + BITMAP_FREE (curr_reg_live); ++ free (sched_regno_cover_class); + } + free (curr_state); -- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) -- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) -+ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) -+ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) - dest_copy--; +@@ -3939,7 +5413,7 @@ + INSN_TICK (next) = tick; - if (dest_copy) -- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); -- } -+ { -+ ps_reg_move_info *move; + delay = tick - clock_var; +- if (delay <= 0 || sched_pressure_p) ++ if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE) + delay = QUEUE_READY; -- /* Now generate the reg_moves, attaching relevant uses to them. */ -- SCHED_NREG_MOVES (u) = nreg_moves; -- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); -- /* Insert the reg-moves right before the notes which precede -- the insn they relates to. */ -- last_reg_move = u->first_note; -+ move = ps_reg_move (ps, first_move + dest_copy - 1); -+ SET_BIT (move->uses, e->dest->cuid); -+ if (e->distance == 1) -+ SET_BIT (distance1_uses, e->dest->cuid); -+ } -+ } + change_queue_index (next, delay); +@@ -5188,7 +6662,7 @@ + if (insn == jump) + break; -+ must_follow = sbitmap_alloc (first_move + nreg_moves); - for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) -- { -- unsigned int i_use = 0; -- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); -- rtx reg_move = gen_move_insn (new_reg, prev_reg); -- sbitmap_iterator sbi; -- -- add_insn_before (reg_move, last_reg_move, NULL); -- last_reg_move = reg_move; -- -- if (!SCHED_FIRST_REG_MOVE (u)) -- SCHED_FIRST_REG_MOVE (u) = reg_move; -- -- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) -- { -- struct undo_replace_buff_elem *rep; -- -- rep = (struct undo_replace_buff_elem *) -- xcalloc (1, sizeof (struct undo_replace_buff_elem)); -- rep->insn = g->nodes[i_use].insn; -- rep->orig_reg = old_reg; -- rep->new_reg = new_reg; -- -- if (! reg_move_replaces) -- reg_move_replaces = rep; -- else -- { -- rep->next = reg_move_replaces; -- reg_move_replaces = rep; -- } -- -- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); -- if (rescan) -- df_insn_rescan (g->nodes[i_use].insn); -- } -- -- prev_reg = new_reg; -- } -- sbitmap_vector_free (uses_of_defs); -+ if (!schedule_reg_move (ps, first_move + i_reg_move, -+ distance1_uses, must_follow)) -+ break; -+ sbitmap_free (must_follow); -+ if (distance1_uses) -+ sbitmap_free (distance1_uses); -+ if (i_reg_move < nreg_moves) -+ return false; - } -- return reg_move_replaces; -+ return true; - } +- if (dep_list_size (insn) == 0) ++ if (dep_list_size (insn, SD_LIST_FORW) == 0) + { + dep_def _new_dep, *new_dep = &_new_dep; --/* Free memory allocated for the undo buffer. */ -+/* Emit the moves associatied with PS. Apply the substitutions -+ associated with them. */ - static void --free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) -+apply_reg_moves (partial_schedule_ptr ps) - { -+ ps_reg_move_info *move; -+ int i; +@@ -5559,6 +7033,7 @@ -- while (reg_move_replaces) -+ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) + FOR_EACH_VEC_ELT (haifa_insn_data_def, h_i_d, i, data) { -- struct undo_replace_buff_elem *rep = reg_move_replaces; -+ unsigned int i_use; -+ sbitmap_iterator sbi; - -- reg_move_replaces = reg_move_replaces->next; -- free (rep); -+ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi) -+ { -+ replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); -+ df_insn_rescan (ps->g->nodes[i_use].insn); -+ } - } ++ free (data->max_reg_pressure); + if (data->reg_pressure != NULL) + free (data->reg_pressure); + for (use = data->reg_use_list; use != NULL; use = next) +--- a/src/gcc/hooks.c ++++ b/src/gcc/hooks.c +@@ -101,6 +101,15 @@ + return true; } --/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values -- of SCHED_ROW and SCHED_STAGE. */ -+/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of -+ SCHED_ROW and SCHED_STAGE. */ - static void --normalize_sched_times (partial_schedule_ptr ps) -+reset_sched_times (partial_schedule_ptr ps, int amount) - { - int row; -- int amount = PS_MIN_CYCLE (ps); - int ii = ps->ii; - ps_insn_ptr crr_insn; ++/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT) ++ and returns false. */ ++bool ++hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED, ++ unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED) ++{ ++ return false; ++} ++ + /* Generic hook that takes (FILE *, const char *) and does nothing. */ + void + hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED) +--- a/src/gcc/hooks.h ++++ b/src/gcc/hooks.h +@@ -34,6 +34,8 @@ + extern bool hook_bool_mode_true (enum machine_mode); + extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx); + extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx); ++extern bool hook_bool_mode_uhwi_false (enum machine_mode, ++ unsigned HOST_WIDE_INT); + extern bool hook_bool_tree_false (tree); + extern bool hook_bool_const_tree_false (const_tree); + extern bool hook_bool_tree_true (tree); +--- a/src/gcc/ifcvt.c ++++ b/src/gcc/ifcvt.c +@@ -1,5 +1,6 @@ + /* If-conversion support. +- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 ++ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, ++ 2011 + Free Software Foundation, Inc. - for (row = 0; row < ii; row++) - for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) - { -- ddg_node_ptr u = crr_insn->node; -+ int u = crr_insn->id; - int normalized_time = SCHED_TIME (u) - amount; -+ int new_min_cycle = PS_MIN_CYCLE (ps) - amount; + This file is part of GCC. +@@ -304,6 +305,10 @@ -- if (dump_file) -- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ -- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME -- (u), ps->min_cycle); -+ if (dump_file) -+ { -+ /* Print the scheduling times after the rotation. */ -+ rtx insn = ps_rtl_insn (ps, u); + for (insn = start; ; insn = NEXT_INSN (insn)) + { ++ /* dwarf2out can't cope with conditional prologues. */ ++ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END) ++ return FALSE; + -+ fprintf (dump_file, "crr_insn->node=%d (insn id %d), " -+ "crr_insn->cycle=%d, min_cycle=%d", u, -+ INSN_UID (insn), normalized_time, new_min_cycle); -+ if (JUMP_P (insn)) -+ fprintf (dump_file, " (branch)"); -+ fprintf (dump_file, "\n"); -+ } -+ - gcc_assert (SCHED_TIME (u) >= ps->min_cycle); - gcc_assert (SCHED_TIME (u) <= ps->max_cycle); -- SCHED_TIME (u) = normalized_time; -- SCHED_ROW (u) = normalized_time % ii; -- SCHED_STAGE (u) = normalized_time / ii; -- } --} -- --/* Set SCHED_COLUMN of each node according to its position in PS. */ --static void --set_columns_for_ps (partial_schedule_ptr ps) --{ -- int row; -- -- for (row = 0; row < ps->ii; row++) -- { -- ps_insn_ptr cur_insn = ps->rows[row]; -- int column = 0; - -- for (; cur_insn; cur_insn = cur_insn->next_in_row) -- SCHED_COLUMN (cur_insn->node) = column++; -- } -+ crr_insn->cycle = normalized_time; -+ update_node_sched_params (u, ii, normalized_time, new_min_cycle); -+ } - } -- -+ - /* Permute the insns according to their order in PS, from row 0 to - row ii-1, and position them right before LAST. This schedules - the insns of the loop kernel. */ -@@ -625,14 +890,220 @@ + if (NOTE_P (insn) || DEBUG_INSN_P (insn)) + goto insn_done; - for (row = 0; row < ii ; row++) - for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) -- if (PREV_INSN (last) != ps_ij->node->insn) -- reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, -- PREV_INSN (last)); -+ { -+ rtx insn = ps_rtl_insn (ps, ps_ij->id); +--- a/src/gcc/internal-fn.c ++++ b/src/gcc/internal-fn.c +@@ -0,0 +1,147 @@ ++/* Internal functions. ++ Copyright (C) 2011 Free Software Foundation, Inc. + -+ if (PREV_INSN (last) != insn) -+ { -+ if (ps_ij->id < ps->g->num_nodes) -+ reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, -+ PREV_INSN (last)); -+ else -+ add_insn_before (insn, last, NULL); -+ } -+ } -+} ++This file is part of GCC. + -+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE -+ respectively only if cycle C falls on the border of the scheduling -+ window boundaries marked by START and END cycles. STEP is the -+ direction of the window. */ -+static inline void -+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow, -+ sbitmap *tmp_precede, sbitmap must_precede, int c, -+ int start, int end, int step) -+{ -+ *tmp_precede = NULL; -+ *tmp_follow = NULL; ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. + -+ if (c == start) -+ { -+ if (step == 1) -+ *tmp_precede = must_precede; -+ else /* step == -1. */ -+ *tmp_follow = must_follow; -+ } -+ if (c == end - step) -+ { -+ if (step == 1) -+ *tmp_follow = must_follow; -+ else /* step == -1. */ -+ *tmp_precede = must_precede; -+ } ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. + -+} ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ + -+/* Return True if the branch can be moved to row ii-1 while -+ normalizing the partial schedule PS to start from cycle zero and thus -+ optimize the SC. Otherwise return False. */ -+static bool -+optimize_sc (partial_schedule_ptr ps, ddg_ptr g) ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "gimple.h" ++#include "tree.h" ++#include "expr.h" ++#include "optabs.h" ++#include "recog.h" ++ ++/* The names of each internal function, indexed by function number. */ ++const char *const internal_fn_name_array[] = { ++#define DEF_INTERNAL_FN(CODE, FLAGS) #CODE, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ "" ++}; ++ ++/* The ECF_* flags of each internal function, indexed by function number. */ ++const int internal_fn_flags_array[] = { ++#define DEF_INTERNAL_FN(CODE, FLAGS) FLAGS, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ 0 ++}; ++ ++/* ARRAY_TYPE is an array of vector modes. Return the associated insn ++ for load-lanes-style optab OPTAB. The insn must exist. */ ++ ++static enum insn_code ++get_multi_vector_move (tree array_type, convert_optab optab) +{ -+ int amount = PS_MIN_CYCLE (ps); -+ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes); -+ int start, end, step; -+ int ii = ps->ii; -+ bool ok = false; -+ int stage_count, stage_count_curr; ++ enum insn_code icode; ++ enum machine_mode imode; ++ enum machine_mode vmode; + -+ /* Compare the SC after normalization and SC after bringing the branch -+ to row ii-1. If they are equal just bail out. */ -+ stage_count = calculate_stage_count (ps, amount); -+ stage_count_curr = -+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); ++ gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE); ++ imode = TYPE_MODE (array_type); ++ vmode = TYPE_MODE (TREE_TYPE (array_type)); + -+ if (stage_count == stage_count_curr) -+ { -+ if (dump_file) -+ fprintf (dump_file, "SMS SC already optimized.\n"); ++ icode = convert_optab_handler (optab, imode, vmode); ++ gcc_assert (icode != CODE_FOR_nothing); ++ return icode; ++} + -+ ok = false; -+ goto clear; -+ } ++/* Expand LOAD_LANES call STMT. */ + -+ if (dump_file) -+ { -+ fprintf (dump_file, "SMS Trying to optimize branch location\n"); -+ fprintf (dump_file, "SMS partial schedule before trial:\n"); -+ print_partial_schedule (ps, dump_file); -+ } ++static void ++expand_LOAD_LANES (gimple stmt) ++{ ++ tree type, lhs, rhs; ++ rtx target, mem; ++ enum insn_code icode; ++ const struct insn_operand_data *operand; + -+ /* First, normalize the partial scheduling. */ -+ reset_sched_times (ps, amount); -+ rotate_partial_schedule (ps, amount); -+ if (dump_file) -+ { -+ fprintf (dump_file, -+ "SMS partial schedule after normalization (ii, %d, SC %d):\n", -+ ii, stage_count); -+ print_partial_schedule (ps, dump_file); -+ } ++ lhs = gimple_call_lhs (stmt); ++ rhs = gimple_call_arg (stmt, 0); ++ type = TREE_TYPE (lhs); + -+ if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) -+ { -+ ok = true; -+ goto clear; -+ } ++ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ mem = expand_normal (rhs); + -+ sbitmap_ones (sched_nodes); ++ gcc_assert (REG_P (target)); ++ gcc_assert (MEM_P (mem)); ++ PUT_MODE (mem, TYPE_MODE (type)); + -+ /* Calculate the new placement of the branch. It should be in row -+ ii-1 and fall into it's scheduling window. */ -+ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start, -+ &step, &end) == 0) -+ { -+ bool success; -+ ps_insn_ptr next_ps_i; -+ int branch_cycle = SCHED_TIME (g->closing_branch->cuid); -+ int row = SMODULO (branch_cycle, ps->ii); -+ int num_splits = 0; -+ sbitmap must_precede, must_follow, tmp_precede, tmp_follow; -+ int c; ++ icode = get_multi_vector_move (type, vec_load_lanes_optab); + -+ if (dump_file) -+ fprintf (dump_file, "\nTrying to schedule node %d " -+ "INSN = %d in (%d .. %d) step %d\n", -+ g->closing_branch->cuid, -+ (INSN_UID (g->closing_branch->insn)), start, end, step); ++ operand = &insn_data[(int) icode].operand[1]; ++ if (operand->predicate && !operand->predicate (mem, operand->mode)) ++ mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0))); + -+ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end)); -+ if (step == 1) -+ { -+ c = start + ii - SMODULO (start, ii) - 1; -+ gcc_assert (c >= start); -+ if (c >= end) -+ { -+ ok = false; -+ if (dump_file) -+ fprintf (dump_file, -+ "SMS failed to schedule branch at cycle: %d\n", c); -+ goto clear; -+ } -+ } -+ else -+ { -+ c = start - SMODULO (start, ii) - 1; -+ gcc_assert (c <= start); ++ emit_insn (GEN_FCN (icode) (target, mem)); ++} + -+ if (c <= end) -+ { -+ if (dump_file) -+ fprintf (dump_file, -+ "SMS failed to schedule branch at cycle: %d\n", c); -+ ok = false; -+ goto clear; -+ } -+ } ++/* Expand STORE_LANES call STMT. */ + -+ must_precede = sbitmap_alloc (g->num_nodes); -+ must_follow = sbitmap_alloc (g->num_nodes); ++static void ++expand_STORE_LANES (gimple stmt) ++{ ++ tree type, lhs, rhs; ++ rtx target, reg; ++ enum insn_code icode; ++ const struct insn_operand_data *operand; + -+ /* Try to schedule the branch is it's new cycle. */ -+ calculate_must_precede_follow (g->closing_branch, start, end, -+ step, ii, sched_nodes, -+ must_precede, must_follow); ++ lhs = gimple_call_lhs (stmt); ++ rhs = gimple_call_arg (stmt, 0); ++ type = TREE_TYPE (rhs); + -+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, -+ must_precede, c, start, end, step); ++ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ reg = expand_normal (rhs); + -+ /* Find the element in the partial schedule related to the closing -+ branch so we can remove it from it's current cycle. */ -+ for (next_ps_i = ps->rows[row]; -+ next_ps_i; next_ps_i = next_ps_i->next_in_row) -+ if (next_ps_i->id == g->closing_branch->cuid) -+ break; ++ gcc_assert (MEM_P (target)); ++ PUT_MODE (target, TYPE_MODE (type)); + -+ remove_node_from_ps (ps, next_ps_i); -+ success = -+ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, -+ sched_nodes, &num_splits, -+ tmp_precede, tmp_follow); -+ gcc_assert (num_splits == 0); -+ if (!success) -+ { -+ if (dump_file) -+ fprintf (dump_file, -+ "SMS failed to schedule branch at cycle: %d, " -+ "bringing it back to cycle %d\n", c, branch_cycle); ++ icode = get_multi_vector_move (type, vec_store_lanes_optab); + -+ /* The branch was failed to be placed in row ii - 1. -+ Put it back in it's original place in the partial -+ schedualing. */ -+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, -+ must_precede, branch_cycle, start, end, -+ step); -+ success = -+ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, -+ branch_cycle, sched_nodes, -+ &num_splits, tmp_precede, -+ tmp_follow); -+ gcc_assert (success && (num_splits == 0)); -+ ok = false; -+ } -+ else -+ { -+ /* The branch is placed in row ii - 1. */ -+ if (dump_file) -+ fprintf (dump_file, -+ "SMS success in moving branch to cycle %d\n", c); ++ operand = &insn_data[(int) icode].operand[0]; ++ if (operand->predicate && !operand->predicate (target, operand->mode)) ++ target = replace_equiv_address (target, ++ force_reg (Pmode, XEXP (target, 0))); + -+ update_node_sched_params (g->closing_branch->cuid, ii, c, -+ PS_MIN_CYCLE (ps)); -+ ok = true; -+ } ++ operand = &insn_data[(int) icode].operand[1]; ++ if (operand->predicate && !operand->predicate (reg, operand->mode)) ++ reg = force_reg (TYPE_MODE (type), reg); + -+ free (must_precede); -+ free (must_follow); -+ } ++ emit_insn (GEN_FCN (icode) (target, reg)); ++} + -+clear: -+ free (sched_nodes); -+ return ok; - } - - static void - duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, -- int to_stage, int for_prolog, rtx count_reg) -+ int to_stage, rtx count_reg) ++/* Routines to expand each internal function, indexed by function number. ++ Each routine has the prototype: ++ ++ expand_ (gimple stmt) ++ ++ where STMT is the statement that performs the call. */ ++static void (*const internal_fn_expanders[]) (gimple) = { ++#define DEF_INTERNAL_FN(CODE, FLAGS) expand_##CODE, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ 0 ++}; ++ ++/* Expand STMT, which is a call to internal function FN. */ ++ ++void ++expand_internal_call (gimple stmt) ++{ ++ internal_fn_expanders[(int) gimple_call_internal_fn (stmt)] (stmt); ++} +--- a/src/gcc/internal-fn.def ++++ b/src/gcc/internal-fn.def +@@ -0,0 +1,42 @@ ++/* Internal functions. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* This file specifies a list of internal "functions". These functions ++ differ from built-in functions in that they have no linkage and cannot ++ be called directly by the user. They represent operations that are only ++ synthesised by GCC itself. ++ ++ Internal functions are used instead of tree codes if the operation ++ and its operands are more naturally represented as a GIMPLE_CALL ++ than a GIMPLE_ASSIGN. ++ ++ Each entry in this file has the form: ++ ++ DEF_INTERNAL_FN (NAME, FLAGS) ++ ++ where NAME is the name of the function and FLAGS is a set of ++ ECF_* flags. Each entry must have a corresponding expander ++ of the form: ++ ++ void expand_NAME (gimple stmt) ++ ++ where STMT is the statement that performs the call. */ ++ ++DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF) ++DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF) +--- a/src/gcc/internal-fn.h ++++ b/src/gcc/internal-fn.h +@@ -0,0 +1,52 @@ ++/* Internal functions. ++ Copyright (C) 2011 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_INTERNAL_FN_H ++#define GCC_INTERNAL_FN_H ++ ++enum internal_fn { ++#define DEF_INTERNAL_FN(CODE, FLAGS) IFN_##CODE, ++#include "internal-fn.def" ++#undef DEF_INTERNAL_FN ++ IFN_LAST ++}; ++ ++extern const char *const internal_fn_name_array[]; ++extern const int internal_fn_flags_array[]; ++ ++/* Return the name of internal function FN. The name is only meaningful ++ for dumps; it has no linkage. */ ++ ++static inline const char * ++internal_fn_name (enum internal_fn fn) ++{ ++ return internal_fn_name_array[(int) fn]; ++} ++ ++/* Return the ECF_* flags for function FN. */ ++ ++static inline int ++internal_fn_flags (enum internal_fn fn) ++{ ++ return internal_fn_flags_array[(int) fn]; ++} ++ ++extern void expand_internal_call (gimple); ++ ++#endif +--- a/src/gcc/ipa-prop.c ++++ b/src/gcc/ipa-prop.c +@@ -1417,6 +1417,8 @@ { - int row; - ps_insn_ptr ps_ij; -@@ -640,59 +1111,30 @@ - for (row = 0; row < ps->ii; row++) - for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) - { -- ddg_node_ptr u_node = ps_ij->node; -- int j, i_reg_moves; -- rtx reg_move = NULL_RTX; -+ int u = ps_ij->id; -+ int first_u, last_u; -+ rtx u_insn; + tree target = gimple_call_fn (call); - /* Do not duplicate any insn which refers to count_reg as it - belongs to the control part. -+ The closing branch is scheduled as well and thus should -+ be ignored. - TODO: This should be done by analyzing the control part of - the loop. */ -- if (reg_mentioned_p (count_reg, u_node->insn)) -+ u_insn = ps_rtl_insn (ps, u); -+ if (reg_mentioned_p (count_reg, u_insn) -+ || JUMP_P (u_insn)) - continue; ++ if (!target) ++ return; + if (TREE_CODE (target) == SSA_NAME) + ipa_analyze_indirect_call_uses (node, info, parms_info, call, target); + else if (TREE_CODE (target) == OBJ_TYPE_REF) +--- a/src/gcc/LINARO-VERSION ++++ b/src/gcc/LINARO-VERSION +@@ -0,0 +1 @@ ++4.6-2013.04 +--- a/src/gcc/longlong.h ++++ b/src/gcc/longlong.h +@@ -203,7 +203,7 @@ + UDItype __umulsidi3 (USItype, USItype); + #endif -- if (for_prolog) -- { -- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing -- number of reg_moves starting with the second occurrence of -- u_node, which is generated if its SCHED_STAGE <= to_stage. */ -- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; -- i_reg_moves = MAX (i_reg_moves, 0); -- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); -- -- /* The reg_moves start from the *first* reg_move backwards. */ -- if (i_reg_moves) -- { -- reg_move = SCHED_FIRST_REG_MOVE (u_node); -- for (j = 1; j < i_reg_moves; j++) -- reg_move = PREV_INSN (reg_move); -- } -- } -- else /* It's for the epilog. */ -+ first_u = SCHED_STAGE (u); -+ last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; -+ if (from_stage <= last_u && to_stage >= first_u) - { -- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, -- starting to decrease one stage after u_node no longer occurs; -- that is, generate all reg_moves until -- SCHED_STAGE (u_node) == from_stage - 1. */ -- i_reg_moves = SCHED_NREG_MOVES (u_node) -- - (from_stage - SCHED_STAGE (u_node) - 1); -- i_reg_moves = MAX (i_reg_moves, 0); -- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); -- -- /* The reg_moves start from the *last* reg_move forwards. */ -- if (i_reg_moves) -- { -- reg_move = SCHED_FIRST_REG_MOVE (u_node); -- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) -- reg_move = PREV_INSN (reg_move); -- } -+ if (u < ps->g->num_nodes) -+ duplicate_insn_chain (ps_first_note (ps, u), u_insn); -+ else -+ emit_insn (copy_rtx (PATTERN (u_insn))); - } -- -- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) -- emit_insn (copy_rtx (PATTERN (reg_move))); -- if (SCHED_STAGE (u_node) >= from_stage -- && SCHED_STAGE (u_node) <= to_stage) -- duplicate_insn_chain (u_node->first_note, u_node->insn); - } - } +-#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32 ++#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) && W_TYPE_SIZE == 32 + #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ +@@ -220,9 +220,12 @@ + "rI" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rI" ((USItype) (bl)) __CLOBBER_CC) +-#define umul_ppmm(xh, xl, a, b) \ +-{register USItype __t0, __t1, __t2; \ +- __asm__ ("%@ Inlined umul_ppmm\n" \ ++# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ ++ || defined(__ARM_ARCH_3__) ++# define umul_ppmm(xh, xl, a, b) \ ++ do { \ ++ register USItype __t0, __t1, __t2; \ ++ __asm__ ("%@ Inlined umul_ppmm\n" \ + " mov %2, %5, lsr #16\n" \ + " mov %0, %6, lsr #16\n" \ + " bic %3, %5, %2, lsl #16\n" \ +@@ -239,14 +242,26 @@ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ +- "r" ((USItype) (b)) __CLOBBER_CC );} +-#define UMUL_TIME 20 +-#define UDIV_TIME 100 ++ "r" ((USItype) (b)) __CLOBBER_CC ); \ ++ } while (0) ++# define UMUL_TIME 20 ++# else ++# define umul_ppmm(xh, xl, a, b) \ ++ do { \ ++ /* Generate umull, under compiler control. */ \ ++ register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ ++ (xl) = (USItype)__t0; \ ++ (xh) = (USItype)(__t0 >> 32); \ ++ } while (0) ++# define UMUL_TIME 3 ++# endif ++# define UDIV_TIME 100 + #endif /* __arm__ */ + + #if defined(__arm__) + /* Let gcc decide how best to implement count_leading_zeros. */ + #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) ++#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) + #define COUNT_LEADING_ZEROS_0 32 + #endif -@@ -726,11 +1168,13 @@ - } +--- a/src/gcc/loop-doloop.c ++++ b/src/gcc/loop-doloop.c +@@ -78,6 +78,8 @@ + rtx inc_src; + rtx condition; + rtx pattern; ++ rtx cc_reg = NULL_RTX; ++ rtx reg_orig = NULL_RTX; - for (i = 0; i < last_stage; i++) -- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); -+ duplicate_insns_of_cycles (ps, 0, i, count_reg); + /* The canonical doloop pattern we expect has one of the following + forms: +@@ -96,7 +98,16 @@ + 2) (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) + (label_ref (label)) +- (pc))). */ ++ (pc))). ++ ++ Some targets (ARM) do the comparison before the branch, as in the ++ following form: ++ ++ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) ++ (set (reg) (plus (reg) (const_int -1)))]) ++ (set (pc) (if_then_else (cc == NE) ++ (label_ref (label)) ++ (pc))) */ - /* Put the prolog on the entry edge. */ - e = loop_preheader_edge (loop); - split_edge_and_insert (e, get_insns ()); -+ if (!flag_resched_modulo_sched) -+ e->dest->flags |= BB_DISABLE_SCHEDULE; + pattern = PATTERN (doloop_pat); - end_sequence (); +@@ -104,19 +115,47 @@ + { + rtx cond; + rtx prev_insn = prev_nondebug_insn (doloop_pat); ++ rtx cmp_arg1, cmp_arg2; ++ rtx cmp_orig; -@@ -738,15 +1182,30 @@ - start_sequence (); +- /* We expect the decrement to immediately precede the branch. */ ++ /* In case the pattern is not PARALLEL we expect two forms ++ of doloop which are cases 2) and 3) above: in case 2) the ++ decrement immediately precedes the branch, while in case 3) ++ the compare and decrement instructions immediately precede ++ the branch. */ - for (i = 0; i < last_stage; i++) -- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); -+ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg); + if (prev_insn == NULL_RTX || !INSN_P (prev_insn)) + return 0; - /* Put the epilogue on the exit edge. */ - gcc_assert (single_exit (loop)); - e = single_exit (loop); - split_edge_and_insert (e, get_insns ()); -+ if (!flag_resched_modulo_sched) -+ e->dest->flags |= BB_DISABLE_SCHEDULE; -+ - end_sequence (); - } + cmp = pattern; +- inc = PATTERN (PREV_INSN (doloop_pat)); ++ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL) ++ { ++ /* The third case: the compare and decrement instructions ++ immediately precede the branch. */ ++ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0); ++ if (GET_CODE (cmp_orig) != SET) ++ return 0; ++ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE) ++ return 0; ++ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0); ++ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1); ++ if (cmp_arg2 != const0_rtx ++ || GET_CODE (cmp_arg1) != PLUS) ++ return 0; ++ reg_orig = XEXP (cmp_arg1, 0); ++ if (XEXP (cmp_arg1, 1) != GEN_INT (-1) ++ || !REG_P (reg_orig)) ++ return 0; ++ cc_reg = SET_DEST (cmp_orig); ++ ++ inc = XVECEXP (PATTERN (prev_insn), 0, 1); ++ } ++ else ++ inc = PATTERN (prev_insn); + /* We expect the condition to be of the form (reg != 0) */ + cond = XEXP (SET_SRC (cmp), 0); + if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx) + return 0; +- + } + else + { +@@ -162,11 +201,15 @@ + return 0; -+/* Mark LOOP as software pipelined so the later -+ scheduling passes don't touch it. */ -+static void -+mark_loop_unsched (struct loop *loop) -+{ -+ unsigned i; -+ basic_block *bbs = get_loop_body (loop); + if ((XEXP (condition, 0) == reg) ++ /* For the third case: */ ++ || ((cc_reg != NULL_RTX) ++ && (XEXP (condition, 0) == cc_reg) ++ && (reg_orig == reg)) + || (GET_CODE (XEXP (condition, 0)) == PLUS +- && XEXP (XEXP (condition, 0), 0) == reg)) ++ && XEXP (XEXP (condition, 0), 0) == reg)) + { + if (GET_CODE (pattern) != PARALLEL) +- /* The second form we expect: ++ /* For the second form we expect: + + (set (reg) (plus (reg) (const_int -1)) + (set (pc) (if_then_else (reg != 0) +@@ -181,7 +224,24 @@ + (set (reg) (plus (reg) (const_int -1))) + (additional clobbers and uses)]) + +- So we return that form instead. ++ For the third form we expect: + -+ for (i = 0; i < loop->num_nodes; i++) -+ bbs[i]->flags |= BB_DISABLE_SCHEDULE; -+} ++ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0)) ++ (set (reg) (plus (reg) (const_int -1)))]) ++ (set (pc) (if_then_else (cc == NE) ++ (label_ref (label)) ++ (pc))) + - /* Return true if all the BBs of the loop are empty except the - loop header. */ - static bool -@@ -1009,10 +1468,10 @@ - continue; - } - -- /* Don't handle BBs with calls or barriers, or !single_set insns, -- or auto-increment insns (to avoid creating invalid reg-moves -- for the auto-increment insns). -- ??? Should handle auto-increment insns. -+ /* Don't handle BBs with calls or barriers -+ or !single_set with the exception of instructions that include -+ count_reg---these instructions are part of the control part -+ that do-loop recognizes. - ??? Should handle insns defining subregs. */ - for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) - { -@@ -1021,8 +1480,8 @@ - if (CALL_P (insn) - || BARRIER_P (insn) - || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) -- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) -- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) -+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE -+ && !reg_mentioned_p (count_reg, insn)) - || (INSN_P (insn) && (set = single_set (insn)) - && GET_CODE (SET_DEST (set)) == SUBREG)) - break; -@@ -1036,8 +1495,6 @@ - fprintf (dump_file, "SMS loop-with-call\n"); - else if (BARRIER_P (insn)) - fprintf (dump_file, "SMS loop-with-barrier\n"); -- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) -- fprintf (dump_file, "SMS reg inc\n"); - else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) - && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) - fprintf (dump_file, "SMS loop-with-not-single-set\n"); -@@ -1049,7 +1506,11 @@ - continue; - } ++ which is equivalent to the following: ++ ++ (parallel [(set (cc) (compare (reg, 1)) ++ (set (reg) (plus (reg) (const_int -1))) ++ (set (pc) (if_then_else (NE == cc) ++ (label_ref (label)) ++ (pc))))]) ++ ++ So we return the second form instead for the two cases. ++ + */ + condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx); -- if (! (g = create_ddg (bb, 0))) -+ /* Always schedule the closing branch with the rest of the -+ instructions. The branch is rotated to be in row ii-1 at the -+ end of the scheduling procedure to make sure it's the last -+ instruction in the iteration. */ -+ if (! (g = create_ddg (bb, 1))) - { - if (dump_file) - fprintf (dump_file, "SMS create_ddg failed\n"); -@@ -1072,9 +1533,9 @@ - { - rtx head, tail; - rtx count_reg, count_init; -- int mii, rec_mii; -- unsigned stage_count = 0; -+ int mii, rec_mii, stage_count, min_cycle; - HOST_WIDEST_INT loop_count = 0; -+ bool opt_sc_p; +--- a/src/gcc/Makefile.in ++++ b/src/gcc/Makefile.in +@@ -903,6 +903,8 @@ + READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h + PARAMS_H = params.h params.def + BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def ++INTERNAL_FN_DEF = internal-fn.def ++INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) + TREE_H = tree.h all-tree.def tree.def c-family/c-common.def \ + $(lang_tree_files) $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \ + $(INPUT_H) statistics.h $(VEC_H) treestruct.def $(HASHTAB_H) \ +@@ -912,7 +914,7 @@ + BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h + GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \ + $(GGC_H) $(BASIC_BLOCK_H) $(TARGET_H) tree-ssa-operands.h \ +- tree-ssa-alias.h vecir.h ++ tree-ssa-alias.h vecir.h $(INTERNAL_FN_H) + GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h + COVERAGE_H = coverage.h $(GCOV_IO_H) + DEMANGLE_H = $(srcdir)/../include/demangle.h +@@ -1284,6 +1286,7 @@ + init-regs.o \ + input.o \ + integrate.o \ ++ internal-fn.o \ + intl.o \ + ira.o \ + ira-build.o \ +@@ -2438,7 +2441,8 @@ + tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \ + $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \ +- $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h ++ $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \ ++ $(TREE_DATA_REF_H) + tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \ + $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \ +@@ -2679,7 +2683,7 @@ + $(TREE_PASS_H) $(PARAMS_H) gt-tree-scalar-evolution.h + tree-data-ref.o : tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + gimple-pretty-print.h $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \ +- $(TREE_PASS_H) langhooks.h ++ $(TREE_PASS_H) langhooks.h tree-affine.h + sese.o : sese.c sese.h $(CONFIG_H) $(SYSTEM_H) coretypes.h tree-pretty-print.h \ + $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) tree-pass.h value-prof.h + graphite.o : graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) \ +@@ -2766,6 +2770,8 @@ + $(TM_H) $(TREE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \ + $(TREE_PASS_H) tree-ssa-propagate.h tree-pretty-print.h \ + gimple-pretty-print.h ++internal-fn.o : internal-fn.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ ++ $(GIMPLE_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) + gimple.o : gimple.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TREE_H) \ + $(GGC_H) $(GIMPLE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) gt-gimple.h \ + $(TREE_FLOW_H) value-prof.h $(FLAGS_H) $(DEMANGLE_H) \ +--- a/src/gcc/modulo-sched.c ++++ b/src/gcc/modulo-sched.c +@@ -116,14 +116,18 @@ - if (! (g = g_arr[loop->num])) - continue; -@@ -1151,63 +1612,103 @@ - fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", - rec_mii, mii, maxii); + /* The number of different iterations the nodes in ps span, assuming + the stage boundaries are placed efficiently. */ +-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ +- + 1 + (ps)->ii - 1) / (ps)->ii) ++#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \ ++ + 1 + ii - 1) / ii) ++/* The stage count of ps. */ ++#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) -- /* After sms_order_nodes and before sms_schedule_by_order, to copy over -- ASAP. */ -- set_node_sched_params (g); -- -- ps = sms_schedule_by_order (g, mii, maxii, node_order); -- -- if (ps){ -- stage_count = PS_STAGE_COUNT (ps); -- gcc_assert(stage_count >= 1); -- } -- -- /* Stage count of 1 means that there is no interleaving between -- iterations, let the scheduling passes do the job. */ -- if (stage_count <= 1 -- || (count_init && (loop_count <= stage_count)) -- || (flag_branch_probabilities && (trip_count <= stage_count))) -+ for (;;) - { -- if (dump_file) -+ set_node_sched_params (g); -+ -+ stage_count = 0; -+ opt_sc_p = false; -+ ps = sms_schedule_by_order (g, mii, maxii, node_order); -+ -+ if (ps) - { -- fprintf (dump_file, "SMS failed... \n"); -- fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); -- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); -- fprintf (dump_file, ", trip-count="); -- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); -- fprintf (dump_file, ")\n"); -- } -- continue; -- } -- else -- { -- struct undo_replace_buff_elem *reg_move_replaces; -+ /* Try to achieve optimized SC by normalizing the partial -+ schedule (having the cycles start from cycle zero). -+ The branch location must be placed in row ii-1 in the -+ final scheduling. If failed, shift all instructions to -+ position the branch in row ii-1. */ -+ opt_sc_p = optimize_sc (ps, g); -+ if (opt_sc_p) -+ stage_count = calculate_stage_count (ps, 0); -+ else -+ { -+ /* Bring the branch to cycle ii-1. */ -+ int amount = (SCHED_TIME (g->closing_branch->cuid) -+ - (ps->ii - 1)); -+ -+ if (dump_file) -+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); + /* A single instruction in the partial schedule. */ + struct ps_insn + { +- /* The corresponding DDG_NODE. */ +- ddg_node_ptr node; ++ /* Identifies the instruction to be scheduled. Values smaller than ++ the ddg's num_nodes refer directly to ddg nodes. A value of ++ X - num_nodes refers to register move X. */ ++ int id; -- if (dump_file) -+ stage_count = calculate_stage_count (ps, amount); -+ } -+ -+ gcc_assert (stage_count >= 1); -+ } -+ -+ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of -+ 1 means that there is no interleaving between iterations thus -+ we let the scheduling passes do the job in this case. */ -+ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC) -+ || (count_init && (loop_count <= stage_count)) -+ || (flag_branch_probabilities && (trip_count <= stage_count))) - { -- fprintf (dump_file, -- "SMS succeeded %d %d (with ii, sc)\n", ps->ii, -- stage_count); -- print_partial_schedule (ps, dump_file); -- fprintf (dump_file, -- "SMS Branch (%d) will later be scheduled at cycle %d.\n", -- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); -+ if (dump_file) -+ { -+ fprintf (dump_file, "SMS failed... \n"); -+ fprintf (dump_file, "SMS sched-failed (stage-count=%d," -+ " loop-count=", stage_count); -+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); -+ fprintf (dump_file, ", trip-count="); -+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); -+ fprintf (dump_file, ")\n"); -+ } -+ break; - } + /* The (absolute) cycle in which the PS instruction is scheduled. + Same as SCHED_TIME (node). */ +@@ -133,10 +137,35 @@ + ps_insn_ptr next_in_row, + prev_in_row; -- /* Set the stage boundaries. If the DDG is built with closing_branch_deps, -- the closing_branch was scheduled and should appear in the last (ii-1) -- row. Otherwise, we are free to schedule the branch, and we let nodes -- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first -- row; this should reduce stage_count to minimum. -- TODO: Revisit the issue of scheduling the insns of the -- control part relative to the branch when the control part -- has more than one insn. */ -- normalize_sched_times (ps); -- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); -+ if (!opt_sc_p) -+ { -+ /* Rotate the partial schedule to have the branch in row ii-1. */ -+ int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); -+ -+ reset_sched_times (ps, amount); -+ rotate_partial_schedule (ps, amount); -+ } -+ - set_columns_for_ps (ps); +- /* The number of nodes in the same row that come after this node. */ +- int row_rest_count; + }; -+ min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); -+ if (!schedule_reg_moves (ps)) -+ { -+ mii = ps->ii + 1; -+ free_partial_schedule (ps); -+ continue; -+ } ++/* Information about a register move that has been added to a partial ++ schedule. */ ++struct ps_reg_move_info ++{ ++ /* The source of the move is defined by the ps_insn with id DEF. ++ The destination is used by the ps_insns with the ids in USES. */ ++ int def; ++ sbitmap uses; + -+ /* Moves that handle incoming values might have been added -+ to a new first stage. Bump the stage count if so. ++ /* The original form of USES' instructions used OLD_REG, but they ++ should now use NEW_REG. */ ++ rtx old_reg; ++ rtx new_reg; + -+ ??? Perhaps we could consider rotating the schedule here -+ instead? */ -+ if (PS_MIN_CYCLE (ps) < min_cycle) -+ { -+ reset_sched_times (ps, 0); -+ stage_count++; -+ } ++ /* The number of consecutive stages that the move occupies. */ ++ int num_consecutive_stages; + -+ /* The stage count should now be correct without rotation. */ -+ gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); -+ PS_STAGE_COUNT (ps) = stage_count; ++ /* An instruction that sets NEW_REG to the correct value. The first ++ move associated with DEF will have an rhs of OLD_REG; later moves ++ use the result of the previous move. */ ++ rtx insn; ++}; + - canon_loop (loop); - -+ if (dump_file) -+ { -+ fprintf (dump_file, -+ "%s:%d SMS succeeded %d %d (with ii, sc)\n", -+ insn_file (tail), insn_line (tail), ps->ii, stage_count); -+ print_partial_schedule (ps, dump_file); -+ } -+ - /* case the BCT count is not known , Do loop-versioning */ - if (count_reg && ! count_init) - { -@@ -1230,23 +1731,23 @@ - permute_partial_schedule (ps, g->closing_branch->first_note); ++typedef struct ps_reg_move_info ps_reg_move_info; ++DEF_VEC_O (ps_reg_move_info); ++DEF_VEC_ALLOC_O (ps_reg_move_info, heap); ++ + /* Holds the partial schedule as an array of II rows. Each entry of the + array points to a linked list of PS_INSNs, which represents the + instructions that are scheduled for that row. */ +@@ -148,6 +177,16 @@ + /* rows[i] points to linked list of insns scheduled in row i (0<=ibb->flags |= BB_DISABLE_SCHEDULE; -+ mark_loop_unsched (loop); -+ - /* The life-info is not valid any more. */ - df_set_bb_dirty (g->bb); ++ /* All the moves added for this partial schedule. Index X has ++ a ps_insn id of X + g->num_nodes. */ ++ VEC (ps_reg_move_info, heap) *reg_moves; ++ ++ /* rows_length[i] holds the number of instructions in the row. ++ It is used only (as an optimization) to back off quickly from ++ trying to schedule a node in a full row; that is, to avoid running ++ through futile DFA state transitions. */ ++ int *rows_length; ++ + /* The earliest absolute cycle of an insn in the partial schedule. */ + int min_cycle; -- reg_move_replaces = generate_reg_moves (ps, true); -+ apply_reg_moves (ps); - if (dump_file) -- print_node_sched_params (dump_file, g->num_nodes, g); -+ print_node_sched_params (dump_file, g->num_nodes, ps); - /* Generate prolog and epilog. */ - generate_prolog_epilog (ps, loop, count_reg, count_init); -- -- free_undo_replace_buff (reg_move_replaces); -+ break; - } +@@ -155,29 +194,18 @@ + int max_cycle; - free_partial_schedule (ps); -- free (node_sched_params); -+ VEC_free (node_sched_params, heap, node_sched_param_vec); - free (node_order); - free_ddg (g); - } -@@ -1347,19 +1848,21 @@ - scheduling window is empty and zero otherwise. */ + ddg_ptr g; /* The DDG of the insns in the partial schedule. */ +-}; - static int --get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, -- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) -+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, -+ sbitmap sched_nodes, int ii, int *start_p, int *step_p, -+ int *end_p) - { - int start, step, end; -+ int early_start, late_start; - ddg_edge_ptr e; -- int u = nodes_order [i]; -- ddg_node_ptr u_node = &ps->g->nodes[u]; - sbitmap psp = sbitmap_alloc (ps->g->num_nodes); - sbitmap pss = sbitmap_alloc (ps->g->num_nodes); - sbitmap u_node_preds = NODE_PREDECESSORS (u_node); - sbitmap u_node_succs = NODE_SUCCESSORS (u_node); - int psp_not_empty; - int pss_not_empty; -+ int count_preds; -+ int count_succs; +-/* We use this to record all the register replacements we do in +- the kernel so we can undo SMS if it is not profitable. */ +-struct undo_replace_buff_elem +-{ +- rtx insn; +- rtx orig_reg; +- rtx new_reg; +- struct undo_replace_buff_elem *next; ++ int stage_count; /* The stage count of the partial schedule. */ + }; - /* 1. compute sched window for u (start, end, step). */ - sbitmap_zero (psp); -@@ -1367,214 +1870,119 @@ - psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); - pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); -- if (psp_not_empty && !pss_not_empty) -- { -- int early_start = INT_MIN; -- -- end = INT_MAX; -- for (e = u_node->in; e != 0; e = e->next_in) -- { -- ddg_node_ptr v_node = e->src; -- -- if (dump_file) -- { -- fprintf (dump_file, "\nProcessing edge: "); -- print_ddg_edge (dump_file, e); -- fprintf (dump_file, -- "\nScheduling %d (%d) in psp_not_empty," -- " checking p %d (%d): ", u_node->cuid, -- INSN_UID (u_node->insn), v_node->cuid, INSN_UID -- (v_node->insn)); -- } - -- if (TEST_BIT (sched_nodes, v_node->cuid)) -- { -- int p_st = SCHED_TIME (v_node); -- -- early_start = -- MAX (early_start, p_st + e->latency - (e->distance * ii)); + static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); + static void free_partial_schedule (partial_schedule_ptr); + static void reset_partial_schedule (partial_schedule_ptr, int new_ii); + void print_partial_schedule (partial_schedule_ptr, FILE *); + static void verify_partial_schedule (partial_schedule_ptr, sbitmap); + static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, +- ddg_node_ptr node, int cycle, +- sbitmap must_precede, +- sbitmap must_follow); ++ int, int, sbitmap, sbitmap); + static void rotate_partial_schedule (partial_schedule_ptr, int); + void set_row_column_for_ps (partial_schedule_ptr); + static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); +@@ -193,34 +221,27 @@ + static void permute_partial_schedule (partial_schedule_ptr, rtx); + static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, + rtx, rtx); +-static void duplicate_insns_of_cycles (partial_schedule_ptr, +- int, int, int, rtx); - -- if (dump_file) -- fprintf (dump_file, -- "pred st = %d; early_start = %d; latency: %d", -- p_st, early_start, e->latency); -+ /* We first compute a forward range (start <= end), then decide whether -+ to reverse it. */ -+ early_start = INT_MIN; -+ late_start = INT_MAX; -+ start = INT_MIN; -+ end = INT_MAX; -+ step = 1; +-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) +-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) +-#define SCHED_FIRST_REG_MOVE(x) \ +- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) +-#define SCHED_NREG_MOVES(x) \ +- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) +-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) +-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) +-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) ++static int calculate_stage_count (partial_schedule_ptr, int); ++static void calculate_must_precede_follow (ddg_node_ptr, int, int, ++ int, int, sbitmap, sbitmap, sbitmap); ++static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, ++ sbitmap, int, int *, int *, int *); ++static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, ++ sbitmap, int *, sbitmap, sbitmap); ++static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); + -+ count_preds = 0; -+ count_succs = 0; ++#define NODE_ASAP(node) ((node)->aux.count) + -+ if (dump_file && (psp_not_empty || pss_not_empty)) -+ { -+ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" -+ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); -+ fprintf (dump_file, "%11s %11s %11s %11s %5s\n", -+ "start", "early start", "late start", "end", "time"); -+ fprintf (dump_file, "=========== =========== =========== ===========" -+ " =====\n"); -+ } -+ /* Calculate early_start and limit end. Both bounds are inclusive. */ -+ if (psp_not_empty) -+ for (e = u_node->in; e != 0; e = e->next_in) -+ { -+ int v = e->src->cuid; - -- if (e->data_type == MEM_DEP) -- end = MIN (end, SCHED_TIME (v_node) + ii - 1); -- } -- else if (dump_file) -- fprintf (dump_file, "the node is not scheduled\n"); -- } -- start = early_start; -- end = MIN (end, early_start + ii); -- /* Schedule the node close to it's predecessors. */ -- step = 1; -+ if (TEST_BIT (sched_nodes, v)) -+ { -+ int p_st = SCHED_TIME (v); -+ int earliest = p_st + e->latency - (e->distance * ii); -+ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); ++#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x) ++#define SCHED_TIME(x) (SCHED_PARAMS (x)->time) ++#define SCHED_ROW(x) (SCHED_PARAMS (x)->row) ++#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) ++#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) -- if (dump_file) -- fprintf (dump_file, -- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", -- u_node->cuid, INSN_UID (u_node->insn), start, end, step); -- } -+ if (dump_file) -+ { -+ fprintf (dump_file, "%11s %11d %11s %11d %5d", -+ "", earliest, "", latest, p_st); -+ print_ddg_edge (dump_file, e); -+ fprintf (dump_file, "\n"); -+ } + /* The scheduling parameters held for each node. */ + typedef struct node_sched_params + { +- int asap; /* A lower-bound on the absolute scheduling cycle. */ +- int time; /* The absolute scheduling cycle (time >= asap). */ +- +- /* The following field (first_reg_move) is a pointer to the first +- register-move instruction added to handle the modulo-variable-expansion +- of the register defined by this node. This register-move copies the +- original register defined by the node. */ +- rtx first_reg_move; +- +- /* The number of register-move instructions added, immediately preceding +- first_reg_move. */ +- int nreg_moves; ++ int time; /* The absolute scheduling cycle. */ -- else if (!psp_not_empty && pss_not_empty) -- { -- int late_start = INT_MAX; -+ early_start = MAX (early_start, earliest); -+ end = MIN (end, latest); + int row; /* Holds time % ii. */ + int stage; /* Holds time / ii. */ +@@ -230,6 +251,9 @@ + int column; + } *node_sched_params_ptr; -- end = INT_MIN; -- for (e = u_node->out; e != 0; e = e->next_out) -- { -- ddg_node_ptr v_node = e->dest; -+ if (e->type == TRUE_DEP && e->data_type == REG_DEP) -+ count_preds++; -+ } -+ } ++typedef struct node_sched_params node_sched_params; ++DEF_VEC_O (node_sched_params); ++DEF_VEC_ALLOC_O (node_sched_params, heap); + + /* The following three functions are copied from the current scheduler + code in order to use sched_analyze() for computing the dependencies. +@@ -279,6 +303,49 @@ + 0 + }; -- if (dump_file) -- { -- fprintf (dump_file, "\nProcessing edge:"); -- print_ddg_edge (dump_file, e); -- fprintf (dump_file, -- "\nScheduling %d (%d) in pss_not_empty," -- " checking s %d (%d): ", u_node->cuid, -- INSN_UID (u_node->insn), v_node->cuid, INSN_UID -- (v_node->insn)); -- } -+ /* Calculate late_start and limit start. Both bounds are inclusive. */ -+ if (pss_not_empty) -+ for (e = u_node->out; e != 0; e = e->next_out) -+ { -+ int v = e->dest->cuid; - -- if (TEST_BIT (sched_nodes, v_node->cuid)) -- { -- int s_st = SCHED_TIME (v_node); -+ if (TEST_BIT (sched_nodes, v)) -+ { -+ int s_st = SCHED_TIME (v); -+ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); -+ int latest = s_st - e->latency + (e->distance * ii); - -- late_start = MIN (late_start, -- s_st - e->latency + (e->distance * ii)); -+ if (dump_file) -+ { -+ fprintf (dump_file, "%11d %11s %11d %11s %5d", -+ earliest, "", latest, "", s_st); -+ print_ddg_edge (dump_file, e); -+ fprintf (dump_file, "\n"); -+ } - -- if (dump_file) -- fprintf (dump_file, -- "succ st = %d; late_start = %d; latency = %d", -- s_st, late_start, e->latency); -- -- if (e->data_type == MEM_DEP) -- end = MAX (end, SCHED_TIME (v_node) - ii + 1); -- if (dump_file) -- fprintf (dump_file, "end = %d\n", end); -+ start = MAX (start, earliest); -+ late_start = MIN (late_start, latest); - -- } -- else if (dump_file) -- fprintf (dump_file, "the node is not scheduled\n"); -+ if (e->type == TRUE_DEP && e->data_type == REG_DEP) -+ count_succs++; -+ } -+ } - -- } -- start = late_start; -- end = MAX (end, late_start - ii); -- /* Schedule the node close to it's successors. */ -+ if (dump_file && (psp_not_empty || pss_not_empty)) -+ { -+ fprintf (dump_file, "----------- ----------- ----------- -----------" -+ " -----\n"); -+ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", -+ start, early_start, late_start, end, "", -+ "(max, max, min, min)"); -+ } ++/* Partial schedule instruction ID in PS is a register move. Return ++ information about it. */ ++static struct ps_reg_move_info * ++ps_reg_move (partial_schedule_ptr ps, int id) ++{ ++ gcc_checking_assert (id >= ps->g->num_nodes); ++ return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes); ++} + -+ /* Get a target scheduling window no bigger than ii. */ -+ if (early_start == INT_MIN && late_start == INT_MAX) -+ early_start = NODE_ASAP (u_node); -+ else if (early_start == INT_MIN) -+ early_start = late_start - (ii - 1); -+ late_start = MIN (late_start, early_start + (ii - 1)); ++/* Return the rtl instruction that is being scheduled by partial schedule ++ instruction ID, which belongs to schedule PS. */ ++static rtx ++ps_rtl_insn (partial_schedule_ptr ps, int id) ++{ ++ if (id < ps->g->num_nodes) ++ return ps->g->nodes[id].insn; ++ else ++ return ps_reg_move (ps, id)->insn; ++} + -+ /* Apply memory dependence limits. */ -+ start = MAX (start, early_start); -+ end = MIN (end, late_start); ++/* Partial schedule instruction ID, which belongs to PS, occured in ++ the original (unscheduled) loop. Return the first instruction ++ in the loop that was associated with ps_rtl_insn (PS, ID). ++ If the instruction had some notes before it, this is the first ++ of those notes. */ ++static rtx ++ps_first_note (partial_schedule_ptr ps, int id) ++{ ++ gcc_assert (id < ps->g->num_nodes); ++ return ps->g->nodes[id].first_note; ++} + -+ if (dump_file && (psp_not_empty || pss_not_empty)) -+ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", -+ "", start, end, "", ""); ++/* Return the number of consecutive stages that are occupied by ++ partial schedule instruction ID in PS. */ ++static int ++ps_num_consecutive_stages (partial_schedule_ptr ps, int id) ++{ ++ if (id < ps->g->num_nodes) ++ return 1; ++ else ++ return ps_reg_move (ps, id)->num_consecutive_stages; ++} + -+ /* If there are at least as many successors as predecessors, schedule the -+ node close to its successors. */ -+ if (pss_not_empty && count_succs >= count_preds) -+ { -+ int tmp = end; -+ end = start; -+ start = tmp; - step = -1; -- -- if (dump_file) -- fprintf (dump_file, -- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", -- u_node->cuid, INSN_UID (u_node->insn), start, end, step); -- - } - -- else if (psp_not_empty && pss_not_empty) -- { -- int early_start = INT_MIN; -- int late_start = INT_MAX; -- int count_preds = 0; -- int count_succs = 0; -- -- start = INT_MIN; -- end = INT_MAX; -- for (e = u_node->in; e != 0; e = e->next_in) -- { -- ddg_node_ptr v_node = e->src; -- -- if (dump_file) -- { -- fprintf (dump_file, "\nProcessing edge:"); -- print_ddg_edge (dump_file, e); -- fprintf (dump_file, -- "\nScheduling %d (%d) in psp_pss_not_empty," -- " checking p %d (%d): ", u_node->cuid, INSN_UID -- (u_node->insn), v_node->cuid, INSN_UID -- (v_node->insn)); -- } -- -- if (TEST_BIT (sched_nodes, v_node->cuid)) -- { -- int p_st = SCHED_TIME (v_node); -- -- early_start = MAX (early_start, -- p_st + e->latency -- - (e->distance * ii)); -- -- if (dump_file) -- fprintf (dump_file, -- "pred st = %d; early_start = %d; latency = %d", -- p_st, early_start, e->latency); -- -- if (e->type == TRUE_DEP && e->data_type == REG_DEP) -- count_preds++; -- -- if (e->data_type == MEM_DEP) -- end = MIN (end, SCHED_TIME (v_node) + ii - 1); -- } -- else if (dump_file) -- fprintf (dump_file, "the node is not scheduled\n"); -- -- } -- for (e = u_node->out; e != 0; e = e->next_out) -- { -- ddg_node_ptr v_node = e->dest; -- -- if (dump_file) -- { -- fprintf (dump_file, "\nProcessing edge:"); -- print_ddg_edge (dump_file, e); -- fprintf (dump_file, -- "\nScheduling %d (%d) in psp_pss_not_empty," -- " checking s %d (%d): ", u_node->cuid, INSN_UID -- (u_node->insn), v_node->cuid, INSN_UID -- (v_node->insn)); -- } -- -- if (TEST_BIT (sched_nodes, v_node->cuid)) -- { -- int s_st = SCHED_TIME (v_node); -- -- late_start = MIN (late_start, -- s_st - e->latency -- + (e->distance * ii)); -- -- if (dump_file) -- fprintf (dump_file, -- "succ st = %d; late_start = %d; latency = %d", -- s_st, late_start, e->latency); -- -- if (e->type == TRUE_DEP && e->data_type == REG_DEP) -- count_succs++; -- -- if (e->data_type == MEM_DEP) -- start = MAX (start, SCHED_TIME (v_node) - ii + 1); -- } -- else if (dump_file) -- fprintf (dump_file, "the node is not scheduled\n"); -- -- } -- start = MAX (start, early_start); -- end = MIN (end, MIN (early_start + ii, late_start + 1)); -- step = 1; -- /* If there are more successors than predecessors schedule the -- node close to it's successors. */ -- if (count_succs >= count_preds) -- { -- int old_start = start; -- -- start = end - 1; -- end = old_start - 1; -- step = -1; -- } -- } -- else /* psp is empty && pss is empty. */ -- { -- start = SCHED_ASAP (u_node); -- end = start + ii; -- step = 1; -- } -+ /* Now that we've finalized the window, make END an exclusive rather -+ than an inclusive bound. */ -+ end += step; - - *start_p = start; - *step_p = step; -@@ -1587,10 +1995,10 @@ - if (dump_file) - fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", - start, end, step); -- return -1; -+ return -1; - } - -- return 0; -+ return 0; - } + /* Given HEAD and TAIL which are the first and last insns in a loop; + return the register which controls the loop. Return zero if it has + more than one occurrence in the loop besides the control part or the +@@ -310,10 +377,10 @@ + either a single (parallel) branch-on-count or a (non-parallel) + branch immediately preceded by a single (decrement) insn. */ + first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail +- : PREV_INSN (tail)); ++ : prev_nondebug_insn (tail)); - /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the -@@ -1646,7 +2054,7 @@ - SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ - for (e = u_node->in; e != 0; e = e->next_in) - if (TEST_BIT (sched_nodes, e->src->cuid) -- && ((SCHED_TIME (e->src) - (e->distance * ii)) == -+ && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == - first_cycle_in_window)) - { - if (dump_file) -@@ -1671,7 +2079,7 @@ - SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ - for (e = u_node->out; e != 0; e = e->next_out) - if (TEST_BIT (sched_nodes, e->dest->cuid) -- && ((SCHED_TIME (e->dest) + (e->distance * ii)) == -+ && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == - last_cycle_in_window)) + for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) +- if (reg_mentioned_p (reg, insn)) ++ if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn)) { - if (dump_file) -@@ -1695,7 +2103,7 @@ - last row of the scheduling window) */ - - static bool --try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, -+try_scheduling_node_in_cycle (partial_schedule_ptr ps, - int u, int cycle, sbitmap sched_nodes, - int *num_splits, sbitmap must_precede, - sbitmap must_follow) -@@ -1704,11 +2112,10 @@ - bool success = 0; - - verify_partial_schedule (ps, sched_nodes); -- psi = ps_add_node_check_conflicts (ps, u_node, cycle, -- must_precede, must_follow); -+ psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); - if (psi) - { -- SCHED_TIME (u_node) = cycle; -+ SCHED_TIME (u) = cycle; - SET_BIT (sched_nodes, u); - success = 1; - *num_splits = 0; -@@ -1760,23 +2167,17 @@ - continue; - } - -- if (JUMP_P (insn)) /* Closing branch handled later. */ -- { -- RESET_BIT (tobe_scheduled, u); -- continue; -- } -- - if (TEST_BIT (sched_nodes, u)) - continue; - - /* Try to get non-empty scheduling window. */ - success = 0; -- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, -+ if (get_sched_window (ps, u_node, sched_nodes, ii, &start, - &step, &end) == 0) - { - if (dump_file) -- fprintf (dump_file, "\nTrying to schedule node %d \ -- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID -+ fprintf (dump_file, "\nTrying to schedule node %d " -+ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID - (g->nodes[u].insn)), start, end, step); - - gcc_assert ((step > 0 && start < end) -@@ -1788,26 +2189,13 @@ - - for (c = start; c != end; c += step) - { -- sbitmap tmp_precede = NULL; -- sbitmap tmp_follow = NULL; -- -- if (c == start) -- { -- if (step == 1) -- tmp_precede = must_precede; -- else /* step == -1. */ -- tmp_follow = must_follow; -- } -- if (c == end - step) -- { -- if (step == 1) -- tmp_follow = must_follow; -- else /* step == -1. */ -- tmp_precede = must_precede; -- } -+ sbitmap tmp_precede, tmp_follow; - -+ set_must_precede_follow (&tmp_follow, must_follow, -+ &tmp_precede, must_precede, -+ c, start, end, step); - success = -- try_scheduling_node_in_cycle (ps, u_node, u, c, -+ try_scheduling_node_in_cycle (ps, u, c, - sched_nodes, - &num_splits, tmp_precede, - tmp_follow); -@@ -1883,6 +2271,7 @@ - int ii = ps->ii; - int new_ii = ii + 1; - int row; -+ int *rows_length_new; + if (dump_file) + { +@@ -379,35 +446,59 @@ + } - verify_partial_schedule (ps, sched_nodes); -@@ -1893,18 +2282,20 @@ - if (dump_file) - fprintf (dump_file, "split_row=%d\n", split_row); +-/* Points to the array that contains the sched data for each node. */ +-static node_sched_params_ptr node_sched_params; ++/* A vector that contains the sched data for each ps_insn. */ ++static VEC (node_sched_params, heap) *node_sched_param_vec; -- normalize_sched_times (ps); -- rotate_partial_schedule (ps, ps->min_cycle); -+ reset_sched_times (ps, PS_MIN_CYCLE (ps)); -+ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); +-/* Allocate sched_params for each node and initialize it. Assumes that +- the aux field of each node contain the asap bound (computed earlier), +- and copies it into the sched_params field. */ ++/* Allocate sched_params for each node and initialize it. */ + static void + set_node_sched_params (ddg_ptr g) + { +- int i; ++ VEC_truncate (node_sched_params, node_sched_param_vec, 0); ++ VEC_safe_grow_cleared (node_sched_params, heap, ++ node_sched_param_vec, g->num_nodes); ++} - rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); -+ rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); - for (row = 0; row < split_row; row++) - { - rows_new[row] = ps->rows[row]; -+ rows_length_new[row] = ps->rows_length[row]; - ps->rows[row] = NULL; - for (crr_insn = rows_new[row]; - crr_insn; crr_insn = crr_insn->next_in_row) - { -- ddg_node_ptr u = crr_insn->node; -+ int u = crr_insn->id; - int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); +- /* Allocate for each node in the DDG a place to hold the "sched_data". */ +- /* Initialize ASAP/ALAP/HIGHT to zero. */ +- node_sched_params = (node_sched_params_ptr) +- xcalloc (g->num_nodes, +- sizeof (struct node_sched_params)); ++/* Make sure that node_sched_param_vec has an entry for every move in PS. */ ++static void ++extend_node_sched_params (partial_schedule_ptr ps) ++{ ++ VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec, ++ ps->g->num_nodes + VEC_length (ps_reg_move_info, ++ ps->reg_moves)); ++} - SCHED_TIME (u) = new_time; -@@ -1920,11 +2311,12 @@ - for (row = split_row; row < ii; row++) +- /* Set the pointer of the general data of the node to point to the +- appropriate sched_params structure. */ +- for (i = 0; i < g->num_nodes; i++) ++/* Update the sched_params (time, row and stage) for node U using the II, ++ the CYCLE of U and MIN_CYCLE. ++ We're not simply taking the following ++ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); ++ because the stages may not be aligned on cycle 0. */ ++static void ++update_node_sched_params (int u, int ii, int cycle, int min_cycle) ++{ ++ int sc_until_cycle_zero; ++ int stage; ++ ++ SCHED_TIME (u) = cycle; ++ SCHED_ROW (u) = SMODULO (cycle, ii); ++ ++ /* The calculation of stage count is done adding the number ++ of stages before cycle zero and after cycle zero. */ ++ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); ++ ++ if (SCHED_TIME (u) < 0) ++ { ++ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero - stage; ++ } ++ else { - rows_new[row + 1] = ps->rows[row]; -+ rows_length_new[row + 1] = ps->rows_length[row]; - ps->rows[row] = NULL; - for (crr_insn = rows_new[row + 1]; - crr_insn; crr_insn = crr_insn->next_in_row) - { -- ddg_node_ptr u = crr_insn->node; -+ int u = crr_insn->id; - int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; - - SCHED_TIME (u) = new_time; -@@ -1941,6 +2333,8 @@ - + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); - free (ps->rows); - ps->rows = rows_new; -+ free (ps->rows_length); -+ ps->rows_length = rows_length_new; - ps->ii = new_ii; - gcc_assert (ps->min_cycle >= 0); +- /* Watch out for aliasing problems? */ +- node_sched_params[i].asap = g->nodes[i].aux.count; +- g->nodes[i].aux.info = &node_sched_params[i]; ++ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); ++ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; + } + } -@@ -1962,24 +2356,24 @@ + static void +-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) ++print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) { - ddg_edge_ptr e; - int lower = INT_MIN, upper = INT_MAX; -- ddg_node_ptr crit_pred = NULL; -- ddg_node_ptr crit_succ = NULL; -+ int crit_pred = -1; -+ int crit_succ = -1; - int crit_cycle; - - for (e = u_node->in; e != 0; e = e->next_in) - { -- ddg_node_ptr v_node = e->src; -+ int v = e->src->cuid; - -- if (TEST_BIT (sched_nodes, v_node->cuid) -- && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) -- if (SCHED_TIME (v_node) > lower) -+ if (TEST_BIT (sched_nodes, v) -+ && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) -+ if (SCHED_TIME (v) > lower) - { -- crit_pred = v_node; -- lower = SCHED_TIME (v_node); -+ crit_pred = v; -+ lower = SCHED_TIME (v); - } - } + int i; -- if (crit_pred != NULL) -+ if (crit_pred >= 0) +@@ -415,22 +506,170 @@ + return; + for (i = 0; i < num_nodes; i++) { - crit_cycle = SCHED_TIME (crit_pred) + 1; - return SMODULO (crit_cycle, ii); -@@ -1987,17 +2381,18 @@ +- node_sched_params_ptr nsp = &node_sched_params[i]; +- rtx reg_move = nsp->first_reg_move; +- int j; ++ node_sched_params_ptr nsp = SCHED_PARAMS (i); - for (e = u_node->out; e != 0; e = e->next_out) - { -- ddg_node_ptr v_node = e->dest; -- if (TEST_BIT (sched_nodes, v_node->cuid) -- && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) -- if (SCHED_TIME (v_node) < upper) -+ int v = e->dest->cuid; -+ -+ if (TEST_BIT (sched_nodes, v) -+ && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) -+ if (SCHED_TIME (v) < upper) - { -- crit_succ = v_node; -- upper = SCHED_TIME (v_node); -+ crit_succ = v; -+ upper = SCHED_TIME (v); - } - } - -- if (crit_succ != NULL) -+ if (crit_succ >= 0) - { - crit_cycle = SCHED_TIME (crit_succ); - return SMODULO (crit_cycle, ii); -@@ -2016,16 +2411,23 @@ - ps_insn_ptr crr_insn; - - for (row = 0; row < ps->ii; row++) -- for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) -- { -- ddg_node_ptr u = crr_insn->node; -- -- gcc_assert (TEST_BIT (sched_nodes, u->cuid)); -- /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by -- popcount (sched_nodes) == number of insns in ps. */ -- gcc_assert (SCHED_TIME (u) >= ps->min_cycle); -- gcc_assert (SCHED_TIME (u) <= ps->max_cycle); -- } -+ { -+ int length = 0; -+ -+ for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) -+ { -+ int u = crr_insn->id; -+ -+ length++; -+ gcc_assert (TEST_BIT (sched_nodes, u)); -+ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by -+ popcount (sched_nodes) == number of insns in ps. */ -+ gcc_assert (SCHED_TIME (u) >= ps->min_cycle); -+ gcc_assert (SCHED_TIME (u) <= ps->max_cycle); -+ } -+ -+ gcc_assert (ps->rows_length[row] == length); + fprintf (file, "Node = %d; INSN = %d\n", i, +- (INSN_UID (g->nodes[i].insn))); +- fprintf (file, " asap = %d:\n", nsp->asap); ++ INSN_UID (ps_rtl_insn (ps, i))); ++ fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i])); + fprintf (file, " time = %d:\n", nsp->time); +- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); +- for (j = 0; j < nsp->nreg_moves; j++) ++ fprintf (file, " stage = %d:\n", nsp->stage); + } - } - - -@@ -2431,6 +2833,8 @@ - { - partial_schedule_ptr ps = XNEW (struct partial_schedule); - ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); -+ ps->rows_length = (int *) xcalloc (ii, sizeof (int)); -+ ps->reg_moves = NULL; - ps->ii = ii; - ps->history = history; - ps->min_cycle = INT_MAX; -@@ -2465,10 +2869,19 @@ - static void - free_partial_schedule (partial_schedule_ptr ps) - { ++} ++ ++/* Set SCHED_COLUMN for each instruction in row ROW of PS. */ ++static void ++set_columns_for_row (partial_schedule_ptr ps, int row) ++{ ++ ps_insn_ptr cur_insn; ++ int column; ++ ++ column = 0; ++ for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) ++ SCHED_COLUMN (cur_insn->id) = column++; ++} ++ ++/* Set SCHED_COLUMN for each instruction in PS. */ ++static void ++set_columns_for_ps (partial_schedule_ptr ps) ++{ ++ int row; ++ ++ for (row = 0; row < ps->ii; row++) ++ set_columns_for_row (ps, row); ++} ++ ++/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. ++ Its single predecessor has already been scheduled, as has its ++ ddg node successors. (The move may have also another move as its ++ successor, in which case that successor will be scheduled later.) ++ ++ The move is part of a chain that satisfies register dependencies ++ between a producing ddg node and various consuming ddg nodes. ++ If some of these dependencies have a distance of 1 (meaning that ++ the use is upward-exposoed) then DISTANCE1_USES is nonnull and ++ contains the set of uses with distance-1 dependencies. ++ DISTANCE1_USES is null otherwise. ++ ++ MUST_FOLLOW is a scratch bitmap that is big enough to hold ++ all current ps_insn ids. ++ ++ Return true on success. */ ++static bool ++schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, ++ sbitmap distance1_uses, sbitmap must_follow) ++{ ++ unsigned int u; ++ int this_time, this_distance, this_start, this_end, this_latency; ++ int start, end, c, ii; ++ sbitmap_iterator sbi; + ps_reg_move_info *move; -+ unsigned int i; ++ rtx this_insn; ++ ps_insn_ptr psi; + - if (!ps) - return; ++ move = ps_reg_move (ps, i_reg_move); ++ ii = ps->ii; ++ if (dump_file) ++ { ++ fprintf (dump_file, "Scheduling register move INSN %d; ii = %d" ++ ", min cycle = %d\n\n", INSN_UID (move->insn), ii, ++ PS_MIN_CYCLE (ps)); ++ print_rtl_single (dump_file, move->insn); ++ fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time"); ++ fprintf (dump_file, "=========== =========== =====\n"); ++ } + -+ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) -+ sbitmap_free (move->uses); -+ VEC_free (ps_reg_move_info, heap, ps->reg_moves); ++ start = INT_MIN; ++ end = INT_MAX; + - free_ps_insns (ps); - free (ps->rows); -+ free (ps->rows_length); - free (ps); - } - -@@ -2486,6 +2899,8 @@ - ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii - * sizeof (ps_insn_ptr)); - memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); -+ ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); -+ memset (ps->rows_length, 0, new_ii * sizeof (int)); - ps->ii = new_ii; - ps->min_cycle = INT_MAX; - ps->max_cycle = INT_MIN; -@@ -2505,8 +2920,13 @@ - fprintf (dump, "\n[ROW %d ]: ", i); - while (ps_i) - { -- fprintf (dump, "%d, ", -- INSN_UID (ps_i->node->insn)); -+ rtx insn = ps_rtl_insn (ps, ps_i->id); ++ /* For dependencies of distance 1 between a producer ddg node A ++ and consumer ddg node B, we have a chain of dependencies: + -+ if (JUMP_P (insn)) -+ fprintf (dump, "%d (branch), ", INSN_UID (insn)); -+ else -+ fprintf (dump, "%d, ", INSN_UID (insn)); -+ - ps_i = ps_i->next_in_row; ++ A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B ++ ++ where Mi is the ith move. For dependencies of distance 0 between ++ a producer ddg node A and consumer ddg node C, we have a chain of ++ dependencies: ++ ++ A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C ++ ++ where Mi' occupies the same position as Mi but occurs a stage later. ++ We can only schedule each move once, so if we have both types of ++ chain, we model the second as: ++ ++ A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C ++ ++ First handle the dependencies between the previously-scheduled ++ predecessor and the move. */ ++ this_insn = ps_rtl_insn (ps, move->def); ++ this_latency = insn_latency (this_insn, move->insn); ++ this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; ++ this_time = SCHED_TIME (move->def) - this_distance * ii; ++ this_start = this_time + this_latency; ++ this_end = this_time + ii; ++ if (dump_file) ++ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", ++ this_start, this_end, SCHED_TIME (move->def), ++ INSN_UID (this_insn), this_latency, this_distance, ++ INSN_UID (move->insn)); ++ ++ if (start < this_start) ++ start = this_start; ++ if (end > this_end) ++ end = this_end; ++ ++ /* Handle the dependencies between the move and previously-scheduled ++ successors. */ ++ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi) ++ { ++ this_insn = ps_rtl_insn (ps, u); ++ this_latency = insn_latency (move->insn, this_insn); ++ if (distance1_uses && !TEST_BIT (distance1_uses, u)) ++ this_distance = -1; ++ else ++ this_distance = 0; ++ this_time = SCHED_TIME (u) + this_distance * ii; ++ this_start = this_time - ii; ++ this_end = this_time - this_latency; ++ if (dump_file) ++ fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n", ++ this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn), ++ this_latency, this_distance, INSN_UID (this_insn)); ++ ++ if (start < this_start) ++ start = this_start; ++ if (end > this_end) ++ end = this_end; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "----------- ----------- -----\n"); ++ fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)"); ++ } ++ ++ sbitmap_zero (must_follow); ++ SET_BIT (must_follow, move->def); ++ ++ start = MAX (start, end - (ii - 1)); ++ for (c = end; c >= start; c--) ++ { ++ psi = ps_add_node_check_conflicts (ps, i_reg_move, c, ++ move->uses, must_follow); ++ if (psi) + { +- fprintf (file, " reg_move = "); +- print_rtl_single (file, reg_move); +- reg_move = PREV_INSN (reg_move); ++ update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps)); ++ if (dump_file) ++ fprintf (dump_file, "\nScheduled register move INSN %d at" ++ " time %d, row %d\n\n", INSN_UID (move->insn), c, ++ SCHED_ROW (i_reg_move)); ++ return true; } } -@@ -2514,36 +2934,31 @@ - - /* Creates an object of PS_INSN and initializes it to the given parameters. */ - static ps_insn_ptr --create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) -+create_ps_insn (int id, int cycle) - { - ps_insn_ptr ps_i = XNEW (struct ps_insn); - -- ps_i->node = node; -+ ps_i->id = id; - ps_i->next_in_row = NULL; - ps_i->prev_in_row = NULL; -- ps_i->row_rest_count = rest_count; - ps_i->cycle = cycle; - - return ps_i; ++ ++ if (dump_file) ++ fprintf (dump_file, "\nNo available slot\n\n"); ++ ++ return false; } - --/* Removes the given PS_INSN from the partial schedule. Returns false if the -- node is not found in the partial schedule, else returns true. */ --static bool -+/* Removes the given PS_INSN from the partial schedule. */ -+static void - remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) + /* +@@ -444,175 +683,201 @@ + nreg_moves = ----------------------------------- + 1 - { dependence. + ii { 1 if not. + */ +-static struct undo_replace_buff_elem * +-generate_reg_moves (partial_schedule_ptr ps, bool rescan) ++static bool ++schedule_reg_moves (partial_schedule_ptr ps) { - int row; + ddg_ptr g = ps->g; + int ii = ps->ii; + int i; +- struct undo_replace_buff_elem *reg_move_replaces = NULL; -- if (!ps || !ps_i) -- return false; -- -+ gcc_assert (ps && ps_i); -+ - row = SMODULO (ps_i->cycle, ps->ii); - if (! ps_i->prev_in_row) + for (i = 0; i < g->num_nodes; i++) { -- if (ps_i != ps->rows[row]) -- return false; + ddg_node_ptr u = &g->nodes[i]; + ddg_edge_ptr e; + int nreg_moves = 0, i_reg_move; +- sbitmap *uses_of_defs; +- rtx last_reg_move; + rtx prev_reg, old_reg; - -+ gcc_assert (ps_i == ps->rows[row]); - ps->rows[row] = ps_i->next_in_row; - if (ps->rows[row]) - ps->rows[row]->prev_in_row = NULL; -@@ -2554,8 +2969,10 @@ - if (ps_i->next_in_row) - ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; - } -+ -+ ps->rows_length[row] -= 1; - free (ps_i); -- return true; -+ return; - } ++ int first_move; ++ int distances[2]; ++ sbitmap must_follow; ++ sbitmap distance1_uses; ++ rtx set = single_set (u->insn); ++ ++ /* Skip instructions that do not set a register. */ ++ if ((set && !REG_P (SET_DEST (set)))) ++ continue; ++ + /* Compute the number of reg_moves needed for u, by looking at life + ranges started at u (excluding self-loops). */ ++ distances[0] = distances[1] = false; + for (e = u->out; e; e = e->next_out) + if (e->type == TRUE_DEP && e->dest != e->src) + { +- int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; ++ int nreg_moves4e = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid)) / ii; - /* Unlike what literature describes for modulo scheduling (which focuses -@@ -2571,6 +2988,7 @@ - ps_insn_ptr next_ps_i; - ps_insn_ptr first_must_follow = NULL; - ps_insn_ptr last_must_precede = NULL; -+ ps_insn_ptr last_in_row = NULL; - int row; + if (e->distance == 1) +- nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; ++ nreg_moves4e = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid) + ii) / ii; - if (! ps_i) -@@ -2585,10 +3003,11 @@ - next_ps_i; - next_ps_i = next_ps_i->next_in_row) - { -- if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) -+ if (must_follow -+ && TEST_BIT (must_follow, next_ps_i->id) - && ! first_must_follow) - first_must_follow = next_ps_i; -- if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) -+ if (must_precede && TEST_BIT (must_precede, next_ps_i->id)) - { - /* If we have already met a node that must follow, then - there is no possible column. */ -@@ -2597,8 +3016,37 @@ - else - last_must_precede = next_ps_i; - } -+ /* The closing branch must be the last in the row. */ -+ if (must_precede -+ && TEST_BIT (must_precede, next_ps_i->id) -+ && JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) -+ return false; -+ -+ last_in_row = next_ps_i; - } + /* If dest precedes src in the schedule of the kernel, then dest + will read before src writes and we can save one reg_copy. */ +- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) +- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) ++ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) ++ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) + nreg_moves4e--; -+ /* The closing branch is scheduled as well. Make sure there is no -+ dependent instruction after it as the branch should be the last -+ instruction in the row. */ -+ if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) -+ { -+ if (first_must_follow) -+ return false; -+ if (last_in_row) ++ if (nreg_moves4e >= 1) ++ { ++ /* !single_set instructions are not supported yet and ++ thus we do not except to encounter them in the loop ++ except from the doloop part. For the latter case ++ we assume no regmoves are generated as the doloop ++ instructions are tied to the branch with an edge. */ ++ gcc_assert (set); ++ /* If the instruction contains auto-inc register then ++ validate that the regmov is being generated for the ++ target regsiter rather then the inc'ed register. */ ++ gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); ++ } ++ ++ if (nreg_moves4e) ++ { ++ gcc_assert (e->distance < 2); ++ distances[e->distance] = true; ++ } + nreg_moves = MAX (nreg_moves, nreg_moves4e); + } + + if (nreg_moves == 0) + continue; + ++ /* Create NREG_MOVES register moves. */ ++ first_move = VEC_length (ps_reg_move_info, ps->reg_moves); ++ VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves, ++ first_move + nreg_moves); ++ extend_node_sched_params (ps); ++ ++ /* Record the moves associated with this node. */ ++ first_move += ps->g->num_nodes; ++ ++ /* Generate each move. */ ++ old_reg = prev_reg = SET_DEST (single_set (u->insn)); ++ for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) + { -+ /* Make the branch the last in the row. New instructions -+ will be inserted at the beginning of the row or after the -+ last must_precede instruction thus the branch is guaranteed -+ to remain the last instruction in the row. */ -+ last_in_row->next_in_row = ps_i; -+ ps_i->prev_in_row = last_in_row; -+ ps_i->next_in_row = NULL; ++ ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move); ++ ++ move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; ++ move->uses = sbitmap_alloc (first_move + nreg_moves); ++ move->old_reg = old_reg; ++ move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); ++ move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; ++ move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); ++ sbitmap_zero (move->uses); ++ ++ prev_reg = move->new_reg; + } -+ else -+ ps->rows[row] = ps_i; -+ return true; -+ } -+ - /* Now insert the node after INSERT_AFTER_PSI. */ ++ ++ distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; ++ + /* Every use of the register defined by node may require a different + copy of this register, depending on the time the use is scheduled. +- Set a bitmap vector, telling which nodes use each copy of this +- register. */ +- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); +- sbitmap_vector_zero (uses_of_defs, nreg_moves); ++ Record which uses require which move results. */ + for (e = u->out; e; e = e->next_out) + if (e->type == TRUE_DEP && e->dest != e->src) + { +- int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; ++ int dest_copy = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid)) / ii; - if (! last_must_precede) -@@ -2631,7 +3079,6 @@ - { - ps_insn_ptr prev, next; - int row; -- ddg_node_ptr next_node; + if (e->distance == 1) +- dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; ++ dest_copy = (SCHED_TIME (e->dest->cuid) ++ - SCHED_TIME (e->src->cuid) + ii) / ii; - if (!ps || !ps_i) - return false; -@@ -2641,11 +3088,9 @@ - if (! ps_i->next_in_row) - return false; +- if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) +- && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) ++ if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) ++ && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) + dest_copy--; -- next_node = ps_i->next_in_row->node; -- - /* Check if next_in_row is dependent on ps_i, both having same sched - times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ -- if (must_follow && TEST_BIT (must_follow, next_node->cuid)) -+ if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id)) - return false; + if (dest_copy) +- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); +- } ++ { ++ ps_reg_move_info *move; - /* Advance PS_I over its next_in_row in the doubly linked list. */ -@@ -2676,21 +3121,16 @@ - before/after (respectively) the node pointed to by PS_I when scheduled - in the same cycle. */ - static ps_insn_ptr --add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, -+add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, - sbitmap must_precede, sbitmap must_follow) - { - ps_insn_ptr ps_i; -- int rest_count = 1; - int row = SMODULO (cycle, ps->ii); +- /* Now generate the reg_moves, attaching relevant uses to them. */ +- SCHED_NREG_MOVES (u) = nreg_moves; +- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); +- /* Insert the reg-moves right before the notes which precede +- the insn they relates to. */ +- last_reg_move = u->first_note; ++ move = ps_reg_move (ps, first_move + dest_copy - 1); ++ SET_BIT (move->uses, e->dest->cuid); ++ if (e->distance == 1) ++ SET_BIT (distance1_uses, e->dest->cuid); ++ } ++ } -- if (ps->rows[row] -- && ps->rows[row]->row_rest_count >= issue_rate) -+ if (ps->rows_length[row] >= issue_rate) - return NULL; - -- if (ps->rows[row]) -- rest_count += ps->rows[row]->row_rest_count; ++ must_follow = sbitmap_alloc (first_move + nreg_moves); + for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) +- { +- unsigned int i_use = 0; +- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); +- rtx reg_move = gen_move_insn (new_reg, prev_reg); +- sbitmap_iterator sbi; - -- ps_i = create_ps_insn (node, rest_count, cycle); -+ ps_i = create_ps_insn (id, cycle); - - /* Finds and inserts PS_I according to MUST_FOLLOW and - MUST_PRECEDE. */ -@@ -2700,6 +3140,7 @@ - return NULL; +- add_insn_before (reg_move, last_reg_move, NULL); +- last_reg_move = reg_move; +- +- if (!SCHED_FIRST_REG_MOVE (u)) +- SCHED_FIRST_REG_MOVE (u) = reg_move; +- +- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) +- { +- struct undo_replace_buff_elem *rep; +- +- rep = (struct undo_replace_buff_elem *) +- xcalloc (1, sizeof (struct undo_replace_buff_elem)); +- rep->insn = g->nodes[i_use].insn; +- rep->orig_reg = old_reg; +- rep->new_reg = new_reg; +- +- if (! reg_move_replaces) +- reg_move_replaces = rep; +- else +- { +- rep->next = reg_move_replaces; +- reg_move_replaces = rep; +- } +- +- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); +- if (rescan) +- df_insn_rescan (g->nodes[i_use].insn); +- } +- +- prev_reg = new_reg; +- } +- sbitmap_vector_free (uses_of_defs); ++ if (!schedule_reg_move (ps, first_move + i_reg_move, ++ distance1_uses, must_follow)) ++ break; ++ sbitmap_free (must_follow); ++ if (distance1_uses) ++ sbitmap_free (distance1_uses); ++ if (i_reg_move < nreg_moves) ++ return false; } - -+ ps->rows_length[row] += 1; - return ps_i; +- return reg_move_replaces; ++ return true; } -@@ -2741,7 +3182,7 @@ - crr_insn; - crr_insn = crr_insn->next_in_row) - { -- rtx insn = crr_insn->node->insn; -+ rtx insn = ps_rtl_insn (ps, crr_insn->id); - - if (!NONDEBUG_INSN_P (insn)) - continue; -@@ -2778,7 +3219,7 @@ - cuid N must be come before/after (respectively) the node pointed to by - PS_I when scheduled in the same cycle. */ - ps_insn_ptr --ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, -+ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, - int c, sbitmap must_precede, - sbitmap must_follow) +-/* Free memory allocated for the undo buffer. */ ++/* Emit the moves associatied with PS. Apply the substitutions ++ associated with them. */ + static void +-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) ++apply_reg_moves (partial_schedule_ptr ps) { -@@ -2820,6 +3261,22 @@ - return ps_i; - } ++ ps_reg_move_info *move; ++ int i; -+/* Calculate the stage count of the partial schedule PS. The calculation -+ takes into account the rotation amount passed in ROTATION_AMOUNT. */ -+int -+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount) -+{ -+ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; -+ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; -+ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); -+ -+ /* The calculation of stage count is done adding the number of stages -+ before cycle zero and after cycle zero. */ -+ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii); -+ -+ return stage_count; -+} -+ - /* Rotate the rows of PS such that insns scheduled at time - START_CYCLE will appear in row 0. Updates max/min_cycles. */ - void -@@ -2837,11 +3294,16 @@ - for (i = 0; i < backward_rotates; i++) +- while (reg_move_replaces) ++ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) { - ps_insn_ptr first_row = ps->rows[0]; -+ int first_row_length = ps->rows_length[0]; +- struct undo_replace_buff_elem *rep = reg_move_replaces; ++ unsigned int i_use; ++ sbitmap_iterator sbi; - for (row = 0; row < last_row; row++) -- ps->rows[row] = ps->rows[row+1]; +- reg_move_replaces = reg_move_replaces->next; +- free (rep); ++ EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi) + { -+ ps->rows[row] = ps->rows[row + 1]; -+ ps->rows_length[row] = ps->rows_length[row + 1]; ++ replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); ++ df_insn_rescan (ps->g->nodes[i_use].insn); + } - - ps->rows[last_row] = first_row; -+ ps->rows_length[last_row] = first_row_length; } + } - ps->max_cycle -= start_cycle; ---- a/src/gcc/objc/ChangeLog -+++ b/src/gcc/objc/ChangeLog -@@ -1,3 +1,18 @@ -+2011-11-12 Iain Sandoe +-/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values +- of SCHED_ROW and SCHED_STAGE. */ ++/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of ++ SCHED_ROW and SCHED_STAGE. */ + static void +-normalize_sched_times (partial_schedule_ptr ps) ++reset_sched_times (partial_schedule_ptr ps, int amount) + { + int row; +- int amount = PS_MIN_CYCLE (ps); + int ii = ps->ii; + ps_insn_ptr crr_insn; + + for (row = 0; row < ii; row++) + for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int normalized_time = SCHED_TIME (u) - amount; ++ int new_min_cycle = PS_MIN_CYCLE (ps) - amount; + +- if (dump_file) +- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ +- min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME +- (u), ps->min_cycle); ++ if (dump_file) ++ { ++ /* Print the scheduling times after the rotation. */ ++ rtx insn = ps_rtl_insn (ps, u); + -+ Backport from mainline -+ 2011-10-29 Iain Sandoe ++ fprintf (dump_file, "crr_insn->node=%d (insn id %d), " ++ "crr_insn->cycle=%d, min_cycle=%d", u, ++ INSN_UID (insn), normalized_time, new_min_cycle); ++ if (JUMP_P (insn)) ++ fprintf (dump_file, " (branch)"); ++ fprintf (dump_file, "\n"); ++ } + -+ PR target/47997 -+ * objc-act.c (objc_build_string_object): Remove redundant second -+ call to fix_string_type (). Add a checking assert that we are, -+ indeed, passed a STRING_CST. -+ -+2011-11-12 Iain Sandoe -+ -+ * objc-next-runtime-abi-01.c (objc_eh_personality): Use gcc personality -+ for Objective-C m32. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/gcc/objc/objc-act.c -+++ b/src/gcc/objc/objc-act.c -@@ -3136,9 +3136,8 @@ - struct string_descriptor *desc, key; - void **loc; - -- /* Prep the string argument. */ -- string = fix_string_type (string); -- TREE_SET_CODE (string, STRING_CST); -+ /* We should be passed a STRING_CST. */ -+ gcc_checking_assert (TREE_CODE (string) == STRING_CST); - length = TREE_STRING_LENGTH (string) - 1; - - /* The target may have different ideas on how to construct an ObjC string ---- a/src/gcc/objc/objc-next-runtime-abi-01.c -+++ b/src/gcc/objc/objc-next-runtime-abi-01.c -@@ -2871,12 +2871,15 @@ - return eh_id; - } - -+/* For NeXT ABI 0 and 1, the personality routines are just those of the -+ underlying language. */ -+ - static tree - objc_eh_personality (void) - { - if (!objc_eh_personality_decl) - #ifndef OBJCPLUS -- objc_eh_personality_decl = build_personality_function ("objc"); -+ objc_eh_personality_decl = build_personality_function ("gcc"); - #else - objc_eh_personality_decl = build_personality_function ("gxx"); - #endif ---- a/src/gcc/optabs.c -+++ b/src/gcc/optabs.c -@@ -225,6 +225,61 @@ - return 1; + gcc_assert (SCHED_TIME (u) >= ps->min_cycle); + gcc_assert (SCHED_TIME (u) <= ps->max_cycle); +- SCHED_TIME (u) = normalized_time; +- SCHED_ROW (u) = normalized_time % ii; +- SCHED_STAGE (u) = normalized_time / ii; +- } +-} +- +-/* Set SCHED_COLUMN of each node according to its position in PS. */ +-static void +-set_columns_for_ps (partial_schedule_ptr ps) +-{ +- int row; +- +- for (row = 0; row < ps->ii; row++) +- { +- ps_insn_ptr cur_insn = ps->rows[row]; +- int column = 0; + +- for (; cur_insn; cur_insn = cur_insn->next_in_row) +- SCHED_COLUMN (cur_insn->node) = column++; +- } ++ crr_insn->cycle = normalized_time; ++ update_node_sched_params (u, ii, normalized_time, new_min_cycle); ++ } } - -+/* Given two input operands, OP0 and OP1, determine what the correct from_mode -+ for a widening operation would be. In most cases this would be OP0, but if -+ that's a constant it'll be VOIDmode, which isn't useful. */ +- ++ + /* Permute the insns according to their order in PS, from row 0 to + row ii-1, and position them right before LAST. This schedules + the insns of the loop kernel. */ +@@ -625,14 +890,220 @@ + + for (row = 0; row < ii ; row++) + for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) +- if (PREV_INSN (last) != ps_ij->node->insn) +- reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, +- PREV_INSN (last)); ++ { ++ rtx insn = ps_rtl_insn (ps, ps_ij->id); + -+static enum machine_mode -+widened_mode (enum machine_mode to_mode, rtx op0, rtx op1) -+{ -+ enum machine_mode m0 = GET_MODE (op0); -+ enum machine_mode m1 = GET_MODE (op1); -+ enum machine_mode result; ++ if (PREV_INSN (last) != insn) ++ { ++ if (ps_ij->id < ps->g->num_nodes) ++ reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn, ++ PREV_INSN (last)); ++ else ++ add_insn_before (insn, last, NULL); ++ } ++ } ++} + -+ if (m0 == VOIDmode && m1 == VOIDmode) -+ return to_mode; -+ else if (m0 == VOIDmode || GET_MODE_SIZE (m0) < GET_MODE_SIZE (m1)) -+ result = m1; -+ else -+ result = m0; ++/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE ++ respectively only if cycle C falls on the border of the scheduling ++ window boundaries marked by START and END cycles. STEP is the ++ direction of the window. */ ++static inline void ++set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow, ++ sbitmap *tmp_precede, sbitmap must_precede, int c, ++ int start, int end, int step) ++{ ++ *tmp_precede = NULL; ++ *tmp_follow = NULL; + -+ if (GET_MODE_SIZE (result) > GET_MODE_SIZE (to_mode)) -+ return to_mode; ++ if (c == start) ++ { ++ if (step == 1) ++ *tmp_precede = must_precede; ++ else /* step == -1. */ ++ *tmp_follow = must_follow; ++ } ++ if (c == end - step) ++ { ++ if (step == 1) ++ *tmp_follow = must_follow; ++ else /* step == -1. */ ++ *tmp_precede = must_precede; ++ } + -+ return result; +} -+ -+/* Find a widening optab even if it doesn't widen as much as we want. -+ E.g. if from_mode is HImode, and to_mode is DImode, and there is no -+ direct HI->SI insn, then return SI->DI, if that exists. -+ If PERMIT_NON_WIDENING is non-zero then this can be used with -+ non-widening optabs also. */ + -+enum insn_code -+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, -+ enum machine_mode from_mode, -+ int permit_non_widening, -+ enum machine_mode *found_mode) ++/* Return True if the branch can be moved to row ii-1 while ++ normalizing the partial schedule PS to start from cycle zero and thus ++ optimize the SC. Otherwise return False. */ ++static bool ++optimize_sc (partial_schedule_ptr ps, ddg_ptr g) +{ -+ for (; (permit_non_widening || from_mode != to_mode) -+ && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) -+ && from_mode != VOIDmode; -+ from_mode = GET_MODE_WIDER_MODE (from_mode)) ++ int amount = PS_MIN_CYCLE (ps); ++ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes); ++ int start, end, step; ++ int ii = ps->ii; ++ bool ok = false; ++ int stage_count, stage_count_curr; ++ ++ /* Compare the SC after normalization and SC after bringing the branch ++ to row ii-1. If they are equal just bail out. */ ++ stage_count = calculate_stage_count (ps, amount); ++ stage_count_curr = ++ calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); ++ ++ if (stage_count == stage_count_curr) + { -+ enum insn_code handler = widening_optab_handler (op, to_mode, -+ from_mode); ++ if (dump_file) ++ fprintf (dump_file, "SMS SC already optimized.\n"); + -+ if (handler != CODE_FOR_nothing) ++ ok = false; ++ goto clear; ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "SMS Trying to optimize branch location\n"); ++ fprintf (dump_file, "SMS partial schedule before trial:\n"); ++ print_partial_schedule (ps, dump_file); ++ } ++ ++ /* First, normalize the partial scheduling. */ ++ reset_sched_times (ps, amount); ++ rotate_partial_schedule (ps, amount); ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "SMS partial schedule after normalization (ii, %d, SC %d):\n", ++ ii, stage_count); ++ print_partial_schedule (ps, dump_file); ++ } ++ ++ if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) ++ { ++ ok = true; ++ goto clear; ++ } ++ ++ sbitmap_ones (sched_nodes); ++ ++ /* Calculate the new placement of the branch. It should be in row ++ ii-1 and fall into it's scheduling window. */ ++ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start, ++ &step, &end) == 0) ++ { ++ bool success; ++ ps_insn_ptr next_ps_i; ++ int branch_cycle = SCHED_TIME (g->closing_branch->cuid); ++ int row = SMODULO (branch_cycle, ps->ii); ++ int num_splits = 0; ++ sbitmap must_precede, must_follow, tmp_precede, tmp_follow; ++ int c; ++ ++ if (dump_file) ++ fprintf (dump_file, "\nTrying to schedule node %d " ++ "INSN = %d in (%d .. %d) step %d\n", ++ g->closing_branch->cuid, ++ (INSN_UID (g->closing_branch->insn)), start, end, step); ++ ++ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end)); ++ if (step == 1) + { -+ if (found_mode) -+ *found_mode = from_mode; -+ return handler; ++ c = start + ii - SMODULO (start, ii) - 1; ++ gcc_assert (c >= start); ++ if (c >= end) ++ { ++ ok = false; ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS failed to schedule branch at cycle: %d\n", c); ++ goto clear; ++ } + } -+ } ++ else ++ { ++ c = start - SMODULO (start, ii) - 1; ++ gcc_assert (c <= start); + -+ return CODE_FOR_nothing; -+} -+ - /* Widen OP to MODE and return the rtx for the widened operand. UNSIGNEDP - says whether OP is signed or unsigned. NO_EXTEND is nonzero if we need - not actually do a sign-extend or zero-extend, but can leave the -@@ -399,6 +454,14 @@ - return TYPE_UNSIGNED (type) ? - vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; - -+ case VEC_WIDEN_LSHIFT_HI_EXPR: -+ return TYPE_UNSIGNED (type) ? -+ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab; ++ if (c <= end) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS failed to schedule branch at cycle: %d\n", c); ++ ok = false; ++ goto clear; ++ } ++ } + -+ case VEC_WIDEN_LSHIFT_LO_EXPR: -+ return TYPE_UNSIGNED (type) ? -+ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab; ++ must_precede = sbitmap_alloc (g->num_nodes); ++ must_follow = sbitmap_alloc (g->num_nodes); + - case VEC_UNPACK_HI_EXPR: - return TYPE_UNSIGNED (type) ? - vec_unpacku_hi_optab : vec_unpacks_hi_optab; -@@ -517,8 +580,9 @@ - optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); - if (ops->code == WIDEN_MULT_PLUS_EXPR - || ops->code == WIDEN_MULT_MINUS_EXPR) -- icode = (int) optab_handler (widen_pattern_optab, -- TYPE_MODE (TREE_TYPE (ops->op2))); -+ icode = (int) find_widening_optab_handler (widen_pattern_optab, -+ TYPE_MODE (TREE_TYPE (ops->op2)), -+ tmode0, 0); - else - icode = (int) optab_handler (widen_pattern_optab, tmode0); - gcc_assert (icode != CODE_FOR_nothing); -@@ -1389,7 +1453,9 @@ - rtx target, int unsignedp, enum optab_methods methods, - rtx last) ++ /* Try to schedule the branch is it's new cycle. */ ++ calculate_must_precede_follow (g->closing_branch, start, end, ++ step, ii, sched_nodes, ++ must_precede, must_follow); ++ ++ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, ++ must_precede, c, start, end, step); ++ ++ /* Find the element in the partial schedule related to the closing ++ branch so we can remove it from it's current cycle. */ ++ for (next_ps_i = ps->rows[row]; ++ next_ps_i; next_ps_i = next_ps_i->next_in_row) ++ if (next_ps_i->id == g->closing_branch->cuid) ++ break; ++ ++ remove_node_from_ps (ps, next_ps_i); ++ success = ++ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, ++ sched_nodes, &num_splits, ++ tmp_precede, tmp_follow); ++ gcc_assert (num_splits == 0); ++ if (!success) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS failed to schedule branch at cycle: %d, " ++ "bringing it back to cycle %d\n", c, branch_cycle); ++ ++ /* The branch was failed to be placed in row ii - 1. ++ Put it back in it's original place in the partial ++ schedualing. */ ++ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede, ++ must_precede, branch_cycle, start, end, ++ step); ++ success = ++ try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, ++ branch_cycle, sched_nodes, ++ &num_splits, tmp_precede, ++ tmp_follow); ++ gcc_assert (success && (num_splits == 0)); ++ ok = false; ++ } ++ else ++ { ++ /* The branch is placed in row ii - 1. */ ++ if (dump_file) ++ fprintf (dump_file, ++ "SMS success in moving branch to cycle %d\n", c); ++ ++ update_node_sched_params (g->closing_branch->cuid, ii, c, ++ PS_MIN_CYCLE (ps)); ++ ok = true; ++ } ++ ++ free (must_precede); ++ free (must_follow); ++ } ++ ++clear: ++ free (sched_nodes); ++ return ok; + } + + static void + duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, +- int to_stage, int for_prolog, rtx count_reg) ++ int to_stage, rtx count_reg) { -- int icode = (int) optab_handler (binoptab, mode); -+ enum machine_mode from_mode = widened_mode (mode, op0, op1); -+ int icode = (int) find_widening_optab_handler (binoptab, mode, -+ from_mode, 1); - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - enum machine_mode mode1 = insn_data[icode].operand[2].mode; - enum machine_mode tmp_mode; -@@ -1546,7 +1612,9 @@ - /* If we can do it with a three-operand insn, do so. */ + int row; + ps_insn_ptr ps_ij; +@@ -640,59 +1111,30 @@ + for (row = 0; row < ps->ii; row++) + for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) + { +- ddg_node_ptr u_node = ps_ij->node; +- int j, i_reg_moves; +- rtx reg_move = NULL_RTX; ++ int u = ps_ij->id; ++ int first_u, last_u; ++ rtx u_insn; - if (methods != OPTAB_MUST_WIDEN -- && optab_handler (binoptab, mode) != CODE_FOR_nothing) -+ && find_widening_optab_handler (binoptab, mode, -+ widened_mode (mode, op0, op1), 1) -+ != CODE_FOR_nothing) - { - temp = expand_binop_directly (mode, binoptab, op0, op1, target, - unsignedp, methods, last); -@@ -1586,8 +1654,9 @@ + /* Do not duplicate any insn which refers to count_reg as it + belongs to the control part. ++ The closing branch is scheduled as well and thus should ++ be ignored. + TODO: This should be done by analyzing the control part of + the loop. */ +- if (reg_mentioned_p (count_reg, u_node->insn)) ++ u_insn = ps_rtl_insn (ps, u); ++ if (reg_mentioned_p (count_reg, u_insn) ++ || JUMP_P (u_insn)) + continue; - if (binoptab == smul_optab - && GET_MODE_WIDER_MODE (mode) != VOIDmode -- && (optab_handler ((unsignedp ? umul_widen_optab : smul_widen_optab), -- GET_MODE_WIDER_MODE (mode)) -+ && (widening_optab_handler ((unsignedp ? umul_widen_optab -+ : smul_widen_optab), -+ GET_MODE_WIDER_MODE (mode), mode) - != CODE_FOR_nothing)) - { - temp = expand_binop (GET_MODE_WIDER_MODE (mode), -@@ -1618,9 +1687,11 @@ - if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing - || (binoptab == smul_optab - && GET_MODE_WIDER_MODE (wider_mode) != VOIDmode -- && (optab_handler ((unsignedp ? umul_widen_optab -- : smul_widen_optab), -- GET_MODE_WIDER_MODE (wider_mode)) -+ && (find_widening_optab_handler ((unsignedp -+ ? umul_widen_optab -+ : smul_widen_optab), -+ GET_MODE_WIDER_MODE (wider_mode), -+ mode, 0) - != CODE_FOR_nothing))) +- if (for_prolog) +- { +- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing +- number of reg_moves starting with the second occurrence of +- u_node, which is generated if its SCHED_STAGE <= to_stage. */ +- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; +- i_reg_moves = MAX (i_reg_moves, 0); +- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); +- +- /* The reg_moves start from the *first* reg_move backwards. */ +- if (i_reg_moves) +- { +- reg_move = SCHED_FIRST_REG_MOVE (u_node); +- for (j = 1; j < i_reg_moves; j++) +- reg_move = PREV_INSN (reg_move); +- } +- } +- else /* It's for the epilog. */ ++ first_u = SCHED_STAGE (u); ++ last_u = first_u + ps_num_consecutive_stages (ps, u) - 1; ++ if (from_stage <= last_u && to_stage >= first_u) { - rtx xop0 = op0, xop1 = op1; -@@ -2043,8 +2114,8 @@ - && optab_handler (add_optab, word_mode) != CODE_FOR_nothing) - { - rtx product = NULL_RTX; +- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, +- starting to decrease one stage after u_node no longer occurs; +- that is, generate all reg_moves until +- SCHED_STAGE (u_node) == from_stage - 1. */ +- i_reg_moves = SCHED_NREG_MOVES (u_node) +- - (from_stage - SCHED_STAGE (u_node) - 1); +- i_reg_moves = MAX (i_reg_moves, 0); +- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); - -- if (optab_handler (umul_widen_optab, mode) != CODE_FOR_nothing) -+ if (widening_optab_handler (umul_widen_optab, mode, word_mode) -+ != CODE_FOR_nothing) - { - product = expand_doubleword_mult (mode, op0, op1, target, - true, methods); -@@ -2053,7 +2124,8 @@ - } +- /* The reg_moves start from the *last* reg_move forwards. */ +- if (i_reg_moves) +- { +- reg_move = SCHED_FIRST_REG_MOVE (u_node); +- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) +- reg_move = PREV_INSN (reg_move); +- } ++ if (u < ps->g->num_nodes) ++ duplicate_insn_chain (ps_first_note (ps, u), u_insn); ++ else ++ emit_insn (copy_rtx (PATTERN (u_insn))); + } +- +- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) +- emit_insn (copy_rtx (PATTERN (reg_move))); +- if (SCHED_STAGE (u_node) >= from_stage +- && SCHED_STAGE (u_node) <= to_stage) +- duplicate_insn_chain (u_node->first_note, u_node->insn); + } + } - if (product == NULL_RTX -- && optab_handler (smul_widen_optab, mode) != CODE_FOR_nothing) -+ && widening_optab_handler (smul_widen_optab, mode, word_mode) -+ != CODE_FOR_nothing) - { - product = expand_doubleword_mult (mode, op0, op1, target, - false, methods); -@@ -2144,7 +2216,8 @@ - wider_mode != VOIDmode; - wider_mode = GET_MODE_WIDER_MODE (wider_mode)) - { -- if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing -+ if (find_widening_optab_handler (binoptab, wider_mode, mode, 1) -+ != CODE_FOR_nothing - || (methods == OPTAB_LIB - && optab_libfunc (binoptab, wider_mode))) - { -@@ -6171,6 +6244,9 @@ - init_optab (usashl_optab, US_ASHIFT); - init_optab (ashr_optab, ASHIFTRT); - init_optab (lshr_optab, LSHIFTRT); -+ init_optabv (vashl_optab, ASHIFT); -+ init_optabv (vashr_optab, ASHIFTRT); -+ init_optabv (vlshr_optab, LSHIFTRT); - init_optab (rotl_optab, ROTATE); - init_optab (rotr_optab, ROTATERT); - init_optab (smin_optab, SMIN); -@@ -6283,6 +6359,10 @@ - init_optab (vec_widen_umult_lo_optab, UNKNOWN); - init_optab (vec_widen_smult_hi_optab, UNKNOWN); - init_optab (vec_widen_smult_lo_optab, UNKNOWN); -+ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN); -+ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN); -+ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN); -+ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN); - init_optab (vec_unpacks_hi_optab, UNKNOWN); - init_optab (vec_unpacks_lo_optab, UNKNOWN); - init_optab (vec_unpacku_hi_optab, UNKNOWN); ---- a/src/gcc/optabs.h -+++ b/src/gcc/optabs.h -@@ -42,6 +42,11 @@ - int insn_code; - }; +@@ -726,11 +1168,13 @@ + } -+struct widening_optab_handlers -+{ -+ struct optab_handlers handlers[NUM_MACHINE_MODES][NUM_MACHINE_MODES]; -+}; -+ - struct optab_d - { - enum rtx_code code; -@@ -50,6 +55,7 @@ - void (*libcall_gen)(struct optab_d *, const char *name, char suffix, - enum machine_mode); - struct optab_handlers handlers[NUM_MACHINE_MODES]; -+ struct widening_optab_handlers *widening; - }; - typedef struct optab_d * optab; + for (i = 0; i < last_stage; i++) +- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); ++ duplicate_insns_of_cycles (ps, 0, i, count_reg); -@@ -344,6 +350,12 @@ - OTI_vec_widen_umult_lo, - OTI_vec_widen_smult_hi, - OTI_vec_widen_smult_lo, -+ /* Widening shift left. -+ The high/low part of the resulting vector is returned. */ -+ OTI_vec_widen_ushiftl_hi, -+ OTI_vec_widen_ushiftl_lo, -+ OTI_vec_widen_sshiftl_hi, -+ OTI_vec_widen_sshiftl_lo, - /* Extract and widen the high/low part of a vector of signed or - floating point elements. */ - OTI_vec_unpacks_hi, -@@ -536,6 +548,10 @@ - #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo]) - #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi]) - #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo]) -+#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi]) -+#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo]) -+#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi]) -+#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo]) - #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi]) - #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo]) - #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi]) -@@ -578,6 +594,9 @@ - COI_satfract, - COI_satfractuns, + /* Put the prolog on the entry edge. */ + e = loop_preheader_edge (loop); + split_edge_and_insert (e, get_insns ()); ++ if (!flag_resched_modulo_sched) ++ e->dest->flags |= BB_DISABLE_SCHEDULE; -+ COI_vec_load_lanes, -+ COI_vec_store_lanes, -+ - COI_MAX - }; + end_sequence (); -@@ -598,6 +617,8 @@ - #define fractuns_optab (&convert_optab_table[COI_fractuns]) - #define satfract_optab (&convert_optab_table[COI_satfract]) - #define satfractuns_optab (&convert_optab_table[COI_satfractuns]) -+#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes]) -+#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes]) +@@ -738,15 +1182,32 @@ + start_sequence (); - /* Contains the optab used for each rtx code. */ - extern optab code_to_optab[NUM_RTX_CODE + 1]; -@@ -794,6 +815,15 @@ - extern void emit_unop_insn (int, rtx, rtx, enum rtx_code); - extern bool maybe_emit_unop_insn (int, rtx, rtx, enum rtx_code); + for (i = 0; i < last_stage; i++) +- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); ++ duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg); -+/* Find a widening optab even if it doesn't widen as much as we want. */ -+#define find_widening_optab_handler(A,B,C,D) \ -+ find_widening_optab_handler_and_mode (A, B, C, D, NULL) -+extern enum insn_code find_widening_optab_handler_and_mode (optab, -+ enum machine_mode, -+ enum machine_mode, -+ int, -+ enum machine_mode *); + /* Put the epilogue on the exit edge. */ + gcc_assert (single_exit (loop)); + e = single_exit (loop); + split_edge_and_insert (e, get_insns ()); ++ if (!flag_resched_modulo_sched) ++ e->dest->flags |= BB_DISABLE_SCHEDULE; + - /* An extra flag to control optab_for_tree_code's behavior. This is needed to - distinguish between machines with a vector shift that takes a scalar for the - shift amount vs. machines that take a vector for the shift amount. */ -@@ -869,6 +899,23 @@ - + (int) CODE_FOR_nothing); + end_sequence (); } -+/* Like optab_handler, but for widening_operations that have a TO_MODE and -+ a FROM_MODE. */ -+ -+static inline enum insn_code -+widening_optab_handler (optab op, enum machine_mode to_mode, -+ enum machine_mode from_mode) ++/* Mark LOOP as software pipelined so the later ++ scheduling passes don't touch it. */ ++static void ++mark_loop_unsched (struct loop *loop) +{ -+ if (to_mode == from_mode || from_mode == VOIDmode) -+ return optab_handler (op, to_mode); ++ unsigned i; ++ basic_block *bbs = get_loop_body (loop); + -+ if (op->widening) -+ return (enum insn_code) (op->widening->handlers[(int) to_mode][(int) from_mode].insn_code -+ + (int) CODE_FOR_nothing); ++ for (i = 0; i < loop->num_nodes; i++) ++ bbs[i]->flags |= BB_DISABLE_SCHEDULE; + -+ return CODE_FOR_nothing; ++ free (bbs); +} + - /* Record that insn CODE should be used to implement mode MODE of OP. */ - - static inline void -@@ -877,6 +924,26 @@ - op->handlers[(int) mode].insn_code = (int) code - (int) CODE_FOR_nothing; - } + /* Return true if all the BBs of the loop are empty except the + loop header. */ + static bool +@@ -1009,10 +1470,10 @@ + continue; + } -+/* Like set_optab_handler, but for widening operations that have a TO_MODE -+ and a FROM_MODE. */ -+ -+static inline void -+set_widening_optab_handler (optab op, enum machine_mode to_mode, -+ enum machine_mode from_mode, enum insn_code code) -+{ -+ if (to_mode == from_mode) -+ set_optab_handler (op, to_mode, code); -+ else -+ { -+ if (op->widening == NULL) -+ op->widening = (struct widening_optab_handlers *) -+ xcalloc (1, sizeof (struct widening_optab_handlers)); -+ -+ op->widening->handlers[(int) to_mode][(int) from_mode].insn_code -+ = (int) code - (int) CODE_FOR_nothing; -+ } -+} -+ - /* Return the insn used to perform conversion OP from mode FROM_MODE - to mode TO_MODE; return CODE_FOR_nothing if the target does not have - such an insn. */ ---- a/src/gcc/opts.c -+++ b/src/gcc/opts.c -@@ -823,6 +823,12 @@ - opts->x_flag_split_stack = 0; +- /* Don't handle BBs with calls or barriers, or !single_set insns, +- or auto-increment insns (to avoid creating invalid reg-moves +- for the auto-increment insns). +- ??? Should handle auto-increment insns. ++ /* Don't handle BBs with calls or barriers ++ or !single_set with the exception of instructions that include ++ count_reg---these instructions are part of the control part ++ that do-loop recognizes. + ??? Should handle insns defining subregs. */ + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) + { +@@ -1021,8 +1482,8 @@ + if (CALL_P (insn) + || BARRIER_P (insn) + || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) +- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) +- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) ++ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE ++ && !reg_mentioned_p (count_reg, insn)) + || (INSN_P (insn) && (set = single_set (insn)) + && GET_CODE (SET_DEST (set)) == SUBREG)) + break; +@@ -1036,8 +1497,6 @@ + fprintf (dump_file, "SMS loop-with-call\n"); + else if (BARRIER_P (insn)) + fprintf (dump_file, "SMS loop-with-barrier\n"); +- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) +- fprintf (dump_file, "SMS reg inc\n"); + else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) + fprintf (dump_file, "SMS loop-with-not-single-set\n"); +@@ -1049,7 +1508,11 @@ + continue; } - } -+ -+ /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion -+ is disabled. */ -+ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert) -+ maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0, -+ opts->x_param_values, opts_set->x_param_values); - } - - #define LEFT_COLUMN 27 ---- a/src/gcc/params.def -+++ b/src/gcc/params.def -@@ -344,6 +344,11 @@ - "sms-max-ii-factor", - "A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop", - 100, 0, 0) -+/* The minimum value of stage count that swing modulo scheduler will generate. */ -+DEFPARAM(PARAM_SMS_MIN_SC, -+ "sms-min-sc", -+ "The minimum value of stage count that swing modulo scheduler will generate.", -+ 2, 1, 1) - DEFPARAM(PARAM_SMS_DFA_HISTORY, - "sms-dfa-history", - "The number of cycles the swing modulo scheduler considers when checking conflicts using DFA", -@@ -883,6 +888,13 @@ - "name lookup fails", - 1000, 0, 0) - -+/* Maximum number of conditional store pairs that can be sunk. */ -+DEFPARAM (PARAM_MAX_STORES_TO_SINK, -+ "max-stores-to-sink", -+ "Maximum number of conditional store pairs that can be sunk", -+ 2, 0, 0) -+ -+ - /* - Local variables: - mode:c ---- a/src/gcc/params.h -+++ b/src/gcc/params.h -@@ -206,4 +206,6 @@ - PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO) - #define MIN_NONDEBUG_INSN_UID \ - PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID) -+#define MAX_STORES_TO_SINK \ -+ PARAM_VALUE (PARAM_MAX_STORES_TO_SINK) - #endif /* ! GCC_PARAMS_H */ ---- a/src/gcc/po/ChangeLog -+++ b/src/gcc/po/ChangeLog -@@ -1,3 +1,7 @@ -+2011-10-30 Joseph Myers -+ -+ * ja.po: Update. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/gcc/po/ja.po -+++ b/src/gcc/po/ja.po -@@ -20,7 +20,7 @@ - "Project-Id-Version: gcc 4.6.1\n" - "Report-Msgid-Bugs-To: http://gcc.gnu.org/bugs.html\n" - "POT-Creation-Date: 2011-06-21 10:27+0000\n" --"PO-Revision-Date: 2011-10-25 22:36+0900\n" -+"PO-Revision-Date: 2011-10-30 18:48+0900\n" - "Last-Translator: Yasuaki Taniguchi \n" - "Language-Team: Japanese \n" - "Language: ja\n" -@@ -834,12 +834,12 @@ - #: gcov.c:420 - #, c-format - msgid " -a, --all-blocks Show information for every basic block\n" --msgstr "" -+msgstr " -a, --all-blocks 各基本ブロックに関する情報を表示する\n" - - #: gcov.c:421 - #, c-format - msgid " -b, --branch-probabilities Include branch probabilities in output\n" --msgstr "" -+msgstr " -b, --branch-probabilities 出力に分岐可能性情報を含める\n" - - #: gcov.c:422 - #, c-format -@@ -847,6 +847,8 @@ - " -c, --branch-counts Given counts of branches taken\n" - " rather than percentages\n" - msgstr "" -+" -c, --branch-counts 分岐に関する百分率では無く行われた\n" -+" 回数を取得する\n" - - #: gcov.c:424 - #, c-format -@@ -859,21 +861,23 @@ - " -l, --long-file-names Use long output file names for included\n" - " source files\n" - msgstr "" -+" -l, --long-file-names インクルードされたソースファイルに関する長い\n" -+" 出力ファイル名を使用する\n" - - #: gcov.c:427 - #, c-format - msgid " -f, --function-summaries Output summaries for each function\n" --msgstr "" -+msgstr " -f, --function-summaries 各関数に関する要約を出力する\n" - - #: gcov.c:428 - #, c-format - msgid " -o, --object-directory DIR|FILE Search for object files in DIR or called FILE\n" --msgstr "" -+msgstr " -o, --object-directory DIR|FILE オブジェクトファイルを DIR 内または呼び出し用 FILE 内で検索する\n" - - #: gcov.c:429 - #, c-format - msgid " -p, --preserve-paths Preserve all pathname components\n" --msgstr "" -+msgstr " -p, --preserve-paths すべてのパス名要素を保護する\n" - - #: gcov.c:430 - #, c-format -@@ -977,7 +981,7 @@ - #: gcov.c:1045 - #, c-format - msgid "%s:cannot open data file, assuming not executed\n" --msgstr "" -+msgstr "%s:データファイルを開けません。実行されていないと見なします\n" - - #: gcov.c:1052 - #, c-format -@@ -1027,7 +1031,7 @@ - #: gcov.c:1379 - #, c-format - msgid "%s:graph is unsolvable for '%s'\n" --msgstr "" -+msgstr "%s: '%s' 用のグラフが解決できません\n" - - #: gcov.c:1459 - #, c-format -@@ -1037,7 +1041,7 @@ - #: gcov.c:1462 - #, c-format - msgid "Lines executed:%s of %d\n" --msgstr "" -+msgstr "実行された行:%s of %d\n" - - #: gcov.c:1466 - #, c-format -@@ -1047,7 +1051,7 @@ - #: gcov.c:1472 - #, c-format - msgid "Branches executed:%s of %d\n" --msgstr "" -+msgstr "実行された分岐:%s of %d\n" - - #: gcov.c:1476 - #, c-format -@@ -1057,12 +1061,12 @@ - #: gcov.c:1482 - #, c-format - msgid "No branches\n" --msgstr "" -+msgstr "分岐がありません\n" - - #: gcov.c:1484 - #, c-format - msgid "Calls executed:%s of %d\n" --msgstr "" -+msgstr "実行された呼び出し:%s of %d\n" - - #: gcov.c:1488 - #, c-format -@@ -1075,24 +1079,24 @@ - msgstr "%s: '%s' に対する行がありません\n" - - #: gcov.c:1843 --#, fuzzy, c-format -+#, c-format - msgid "call %2d returned %s\n" --msgstr "呼び出し %d の戻り = %d\n" -+msgstr "" - - #: gcov.c:1848 --#, fuzzy, c-format -+#, c-format - msgid "call %2d never executed\n" --msgstr "呼び出し %d は一度も実行せず\n" -+msgstr "" - - #: gcov.c:1853 --#, fuzzy, c-format -+#, c-format - msgid "branch %2d taken %s%s\n" --msgstr "ブランチ %d 受理 = %d%%\n" -+msgstr "" - - #: gcov.c:1857 --#, fuzzy, c-format -+#, c-format - msgid "branch %2d never executed\n" --msgstr "ブランチ %d は一度も実行されず\n" -+msgstr "" - - #: gcov.c:1862 - #, c-format -@@ -1100,9 +1104,9 @@ - msgstr "" - - #: gcov.c:1865 --#, fuzzy, c-format -+#, c-format - msgid "unconditional %2d never executed\n" --msgstr "呼び出し %d は一度も実行せず\n" -+msgstr "" - - #: gcov.c:1901 - #, c-format -@@ -1412,11 +1416,11 @@ - - #: opts.c:1183 - msgid "The following options take separate arguments" --msgstr "" -+msgstr "次のオプションは分離した引数を取ります" - - #: opts.c:1185 - msgid "The following options take joined arguments" --msgstr "" -+msgstr "次のオプションは結合した引数を取ります" - - #: opts.c:1196 - msgid "The following options are language-related" -@@ -1472,7 +1476,7 @@ - #: targhooks.c:1469 - #, c-format - msgid "created and used with differing settings of '%s'" --msgstr "" -+msgstr "作成時と使用時で '%s' の設定が異なります" - - #: targhooks.c:1471 - msgid "out of memory" -@@ -1480,11 +1484,11 @@ - - #: targhooks.c:1486 - msgid "created and used with different settings of -fpic" --msgstr "" -+msgstr "作成時と使用時で -fpic の設定が異なります" - - #: targhooks.c:1488 - msgid "created and used with different settings of -fpie" --msgstr "" -+msgstr "作成時と使用時で -fpie の設定が異なります" - - #: tlink.c:386 - #, c-format -@@ -1717,11 +1721,11 @@ - - #: params.def:100 - msgid "The maximum depth of recursive inlining for inline functions" --msgstr "" -+msgstr "インライン関数を再帰的にインライン化する時の最大深度" - - #: params.def:105 - msgid "The maximum depth of recursive inlining for non-inline functions" --msgstr "" -+msgstr "非インライン関数を再帰的にインライン化する時の最大深度" - - #: params.def:110 - msgid "Inline recursively only when the probability of call being executed exceeds the parameter" -@@ -1761,16 +1765,15 @@ - - #: params.def:180 - msgid "The size of function body to be considered large" --msgstr "" -+msgstr "大きいと見なされる関数本体のサイズ" - - #: params.def:184 - msgid "Maximal growth due to inlining of large function (in percent)" - msgstr "" - - #: params.def:188 --#, fuzzy - msgid "The size of translation unit to be considered large" --msgstr "翻訳単位全体をファイルにダンプする" -+msgstr "大きいと見なされる翻訳単位のサイズ" - - #: params.def:192 - msgid "How much can given compilation unit grow because of the inlining (in percent)" -@@ -1786,20 +1789,19 @@ - - #: params.def:204 - msgid "The size of stack frame to be considered large" --msgstr "" -+msgstr "大きいと見なされるスタックフレームのサイズ" - - #: params.def:208 - msgid "Maximal stack frame growth due to inlining (in percent)" --msgstr "" -+msgstr "インライン化によって増加するスタックフレームの最大量 (百分率)" - - #: params.def:215 - msgid "The maximum amount of memory to be allocated by GCSE" --msgstr "" -+msgstr "GCSE によって配置されるメモリの最大量" - - #: params.def:222 --#, fuzzy - msgid "The maximum ratio of insertions to deletions of expressions in GCSE" --msgstr "RPTS 用の最大反復数を指定する" -+msgstr "" - - #: params.def:233 - msgid "The threshold ratio for performing partial redundancy elimination after reload" -@@ -1963,9 +1965,8 @@ - msgstr "" - - #: params.def:470 --#, fuzzy - msgid "Bound on number of iv uses in loop optimized in iv optimizations" --msgstr "目立たない、コストのかかる最適化を行なう" -+msgstr "" - - #: params.def:478 - msgid "If number of candidates in the set is smaller, we always try to remove unused ivs during its optimization" -@@ -2044,9 +2045,8 @@ - msgstr "" - - #: params.def:594 --#, fuzzy - msgid "The maximum number of iterations through CFG to extend regions" --msgstr "RPTS 用の最大反復数を指定する" -+msgstr "" - - #: params.def:599 - msgid "The maximum conflict delay for an insn to be considered for speculative motion" -@@ -2077,9 +2077,8 @@ - msgstr "" - - #: params.def:637 --#, fuzzy - msgid "The upper bound for sharing integer constants" --msgstr "`%s' の列挙値が整数定数ではありません" -+msgstr "整数定数を共有するための上限値" - - #: params.def:656 - msgid "Minimum number of virtual mappings to consider switching to full virtual renames" -@@ -2111,11 +2110,11 @@ - - #: params.def:714 - msgid "The number of insns executed before prefetch is completed" --msgstr "" -+msgstr "プリフェッチが完了する前に実行される命令数" - - #: params.def:721 - msgid "The number of prefetches that can run at the same time" --msgstr "" -+msgstr "同時に実行可能なプリフェッチの数" - - #: params.def:728 - msgid "The size of L1 cache" -@@ -2162,9 +2161,8 @@ - msgstr "" - - #: params.def:806 --#, fuzzy - msgid "maximum number of parameters in a SCoP" --msgstr "RPTS 用の最大反復数を指定する" -+msgstr "SCoP 内のパラメータの最大数" - - #: params.def:813 - msgid "maximum number of basic blocks per function to be analyzed by Graphite" -@@ -2597,9 +2595,9 @@ - msgstr "無効な %%E 値" - - #: config/alpha/alpha.c:5431 config/alpha/alpha.c:5479 --#, fuzzy, c-format -+#, c-format - msgid "unknown relocation unspec" --msgstr "不明な設定済コンストラクタ型です" -+msgstr "" - - #: config/alpha/alpha.c:5440 config/crx/crx.c:1119 - #: config/rs6000/rs6000.c:16490 config/spu/spu.c:1726 -@@ -3000,32 +2998,32 @@ - #: config/i386/i386.c:14106 config/i386/i386.c:14146 - #, c-format - msgid "operand is not a condition code, invalid operand code 'D'" --msgstr "" -+msgstr "被演算子は条件コードではありません。無効な被演算子コード 'D' です" - - #: config/i386/i386.c:14172 - #, c-format - msgid "operand is neither a constant nor a condition code, invalid operand code 'C'" --msgstr "" -+msgstr "被演算子は定数でも条件コードでもありません。無効な被演算子コード 'C' です" - - #: config/i386/i386.c:14182 - #, c-format - msgid "operand is neither a constant nor a condition code, invalid operand code 'F'" --msgstr "" -+msgstr "被演算子は定数でも条件コードでもありません。無効な被演算子コード 'F' です" - - #: config/i386/i386.c:14200 - #, c-format - msgid "operand is neither a constant nor a condition code, invalid operand code 'c'" --msgstr "" -+msgstr "被演算子は定数でも条件コードでもありません。無効な被演算子コード 'c' です" - - #: config/i386/i386.c:14210 - #, c-format - msgid "operand is neither a constant nor a condition code, invalid operand code 'f'" --msgstr "" -+msgstr "被演算子は定数でも条件コードでもありません。無効な被演算子コード 'f' です" - - #: config/i386/i386.c:14313 - #, c-format - msgid "operand is not a condition code, invalid operand code 'Y'" --msgstr "" -+msgstr "被演算子は条件コードではありません。無効な被演算子コード 'Y' です" - - #: config/i386/i386.c:14339 - #, c-format -@@ -3098,7 +3096,7 @@ - #: config/lm32/lm32.c:529 - #, c-format - msgid "only 0.0 can be loaded as an immediate" --msgstr "" -+msgstr "即値としてロードできるのは 0.0 のみです" - - #: config/lm32/lm32.c:599 - msgid "bad operand" -@@ -3138,15 +3136,15 @@ - - #: config/m32r/m32r.c:2290 - msgid "pre-increment address is not a register" --msgstr "" -+msgstr "前置増分アドレスがレジスタではありません" - - #: config/m32r/m32r.c:2297 - msgid "pre-decrement address is not a register" --msgstr "" -+msgstr "前置減分アドレスがレジスタではありません" - - #: config/m32r/m32r.c:2304 - msgid "post-increment address is not a register" --msgstr "" -+msgstr "後置増分アドレスがレジスタではありません" - - #: config/m32r/m32r.c:2380 config/m32r/m32r.c:2394 - #: config/rs6000/rs6000.c:25500 -@@ -3252,7 +3250,7 @@ - - #: config/mmix/mmix.c:1589 config/mmix/mmix.c:1719 - msgid "MMIX Internal: Expected a CONST_INT, not this" --msgstr "" -+msgstr "MMIX 内部: CONST_INT が予期されますが、異なっています" - - #: config/mmix/mmix.c:1668 - msgid "MMIX Internal: Bad value for 'm', not a CONST_INT" -@@ -3260,11 +3258,11 @@ - - #: config/mmix/mmix.c:1687 - msgid "MMIX Internal: Expected a register, not this" --msgstr "" -+msgstr "MMIX 内部: レジスタが予期されますが、異なっています" - - #: config/mmix/mmix.c:1697 - msgid "MMIX Internal: Expected a constant, not this" --msgstr "" -+msgstr "MMIX 内部: 定数が予期されますが、異なっています" - - #. We need the original here. - #: config/mmix/mmix.c:1781 -@@ -3301,7 +3299,7 @@ - - #: config/picochip/picochip.c:2983 config/picochip/picochip.c:3015 - msgid "Bad address, not (reg+disp):" --msgstr "" -+msgstr "誤ったアドレスです。 (reg+disp) ではありません:" - - #: config/picochip/picochip.c:3029 - msgid "Bad address, not register:" -@@ -3526,15 +3524,15 @@ - - #: config/sh/sh.c:9271 - msgid "created and used with different architectures / ABIs" --msgstr "" -+msgstr "作成時と使用時で アーキテクチャ/ABI が異なります" - - #: config/sh/sh.c:9273 - msgid "created and used with different ABIs" --msgstr "" -+msgstr "作成時と使用時で ABI が異なります" - - #: config/sh/sh.c:9275 - msgid "created and used with different endianness" --msgstr "" -+msgstr "作成時と使用時でエンディアンが異なります" - - #: config/sparc/sparc.c:7445 config/sparc/sparc.c:7451 - #, c-format -@@ -3617,7 +3615,7 @@ - #: config/vax/vax.c:427 - #, c-format - msgid "symbol with offset used in PIC mode" --msgstr "" -+msgstr "PIC モードで使用されるオフセット付きのシンボルです" - - #: config/vax/vax.c:513 - #, c-format -@@ -3837,19 +3835,19 @@ - msgstr "%s:%d:%d: ここから再帰的に実体化されました" - - #: cp/error.c:2913 --#, fuzzy, c-format -+#, c-format - msgid "%s:%d:%d: instantiated from here" - msgstr "%s:%d:%d: ここから実体化されました" - - #: cp/error.c:2918 - #, c-format - msgid "%s:%d: recursively instantiated from here" --msgstr "" -+msgstr "%s:%d: ここから再帰的に実体化されました" - - #: cp/error.c:2919 --#, fuzzy, c-format -+#, c-format - msgid "%s:%d: instantiated from here" --msgstr "%s:%d: ここで実体化されました\n" -+msgstr "%s:%d: ここから実体化されました" - - #: cp/error.c:2962 - #, c-format -@@ -4029,22 +4027,21 @@ - #: fortran/expr.c:607 - #, c-format - msgid "Constant expression required at %C" --msgstr "" -+msgstr "%C では定数式が要求されます" - - #: fortran/expr.c:610 - #, c-format - msgid "Integer expression required at %C" --msgstr "" -+msgstr "%C では整数式が要求されます" - - #: fortran/expr.c:615 --#, fuzzy, c-format -+#, c-format - msgid "Integer value too large in expression at %C" --msgstr "式の整数がオーバーフローしました" -+msgstr "%C の式内で整数値が大きすぎます" - - #: fortran/expr.c:3147 --#, fuzzy - msgid "array assignment" --msgstr "代入" -+msgstr "配列代入" - - #: fortran/gfortranspec.c:303 - #, c-format -@@ -4080,7 +4077,7 @@ - - #: fortran/io.c:551 - msgid "Unexpected element '%c' in format string at %L" --msgstr "" -+msgstr "予期しない要素 '%c' が書式文字列内 (位置 %L) にあります" - - #: fortran/io.c:553 - msgid "Unexpected end of format string" -@@ -4088,15 +4085,15 @@ - - #: fortran/io.c:554 - msgid "Zero width in format descriptor" --msgstr "" -+msgstr "幅 0 の書式記述子です" - - #: fortran/io.c:574 - msgid "Missing leading left parenthesis" --msgstr "" -+msgstr "前に左小括弧がありません" - - #: fortran/io.c:603 - msgid "Left parenthesis required after '*'" --msgstr "" -+msgstr "'*' の後には左小括弧が必要です" - - #: fortran/io.c:634 - msgid "Expected P edit descriptor" -@@ -4116,9 +4113,8 @@ - msgstr "" - - #: fortran/io.c:844 --#, fuzzy - msgid "E specifier not allowed with g0 descriptor" --msgstr "型指定子 `%s' は struct や class の後には使えません" -+msgstr "" - - #: fortran/io.c:914 - msgid "Positive exponent width required" -@@ -4362,9 +4358,8 @@ - msgstr "" - - #: fortran/resolve.c:6233 --#, fuzzy - msgid "End expression in DO loop" --msgstr "オペランドとして無効な式" -+msgstr "" - - #: fortran/resolve.c:6237 - msgid "Step expression in DO loop" -@@ -4564,7 +4559,7 @@ - #: java/jcf-dump.c:1148 - #, c-format - msgid " --extdirs PATH Set extensions directory path\n" --msgstr "" -+msgstr " --extdirs PATH 拡張のディレクトリパスを設定する\n" - - #: java/jcf-dump.c:1149 - #, c-format -@@ -4734,18 +4729,18 @@ - #: config/pa/pa-hpux11.h:111 config/pa/pa64-hpux.h:30 config/pa/pa64-hpux.h:33 - #: config/pa/pa64-hpux.h:42 config/pa/pa64-hpux.h:45 - msgid "warning: consider linking with '-static' as system libraries with" --msgstr "" -+msgstr "警告: システムライブラリとリンクする時は '-static' を指定することを検討してください" - - #: config/pa/pa-hpux10.h:90 config/pa/pa-hpux10.h:93 config/pa/pa-hpux10.h:101 - #: config/pa/pa-hpux10.h:104 config/pa/pa-hpux11.h:109 - #: config/pa/pa-hpux11.h:112 config/pa/pa64-hpux.h:31 config/pa/pa64-hpux.h:34 - #: config/pa/pa64-hpux.h:43 config/pa/pa64-hpux.h:46 - msgid " profiling support are only provided in archive format" --msgstr "" -+msgstr " プロファイリングサポートは書庫フォーマット内でのみ提供されます" - - #: config/rs6000/darwin.h:99 - msgid " conflicting code gen style switches are used" --msgstr "" -+msgstr " 競合しているコード生成スタイルスイッチが使用されています" - - #: config/arm/arm.h:178 - msgid "-msoft-float and -mhard_float may not be used together" -@@ -4805,7 +4800,7 @@ - - #: config/i386/linux-unwind.h:186 - msgid "ax ; {int $0x80 | syscall" --msgstr "" -+msgstr "ax ; {int $0x80 | syscall" - - #: config/s390/tpf.h:120 - msgid "static is not supported on TPF-OS" -@@ -4869,7 +4864,7 @@ - - #: java/lang.opt:206 - msgid "--extdirs=\tSet the extension directory path" --msgstr "" -+msgstr "--extdirs=\t拡張のディレクトリパスを設定する" - - #: java/lang.opt:216 - msgid "Input file is a file with a list of filenames to compile" -@@ -5388,7 +5383,7 @@ - - #: config/frv/frv.opt:31 - msgid "Enable label alignment optimizations" --msgstr "" -+msgstr "ラベル整列最適化を有効にする" - - #: config/frv/frv.opt:35 - msgid "Dynamically allocate cc registers" -@@ -5452,7 +5447,7 @@ - - #: config/frv/frv.opt:116 - msgid "Enable use of GPREL for read-only data in FDPIC" --msgstr "" -+msgstr "FDPIC 内の読み取り専用データ用 GPREL の使用を有効にする" - - #: config/frv/frv.opt:120 config/rs6000/rs6000.opt:216 - #: config/pdp11/pdp11.opt:67 -@@ -5460,9 +5455,8 @@ - msgstr "ハードウェア浮動小数点を利用する" - - #: config/frv/frv.opt:124 config/bfin/bfin.opt:77 --#, fuzzy - msgid "Enable inlining of PLT in function calls" --msgstr "関数呼び出しの前後でレジスタの保存を有効にする" -+msgstr "関数呼び出し内で PLT のインライン化を有効にする" - - #: config/frv/frv.opt:128 - msgid "Enable PIC support for building libraries" -@@ -5478,7 +5472,7 @@ - - #: config/frv/frv.opt:140 - msgid "Use media instructions" --msgstr "" -+msgstr "media 命令を使用する" - - #: config/frv/frv.opt:144 - msgid "Use multiply add/subtract instructions" -@@ -5494,7 +5488,7 @@ - - #: config/frv/frv.opt:157 - msgid "Do not mark ABI switches in e_flags" --msgstr "" -+msgstr "e_flags 内の ABI スイッチをマークしない" - - #: config/frv/frv.opt:161 - msgid "Remove redundant membars" -@@ -5506,7 +5500,7 @@ - - #: config/frv/frv.opt:169 - msgid "Enable setting GPRs to the result of comparisons" --msgstr "" -+msgstr "比較結果を汎用レジスタに設定することを有効にする" - - #: config/frv/frv.opt:173 - msgid "Change the amount of scheduler lookahead" -@@ -5565,9 +5559,8 @@ - msgstr "" - - #: config/mn10300/mn10300.opt:56 --#, fuzzy - msgid "Allow gcc to generate LIW instructions" --msgstr "gcc が repeat/erepeat 命令を使用することを許可する" -+msgstr "gcc が LIW 命令を生成することを許可する" - - #: config/s390/tpf.opt:23 - msgid "Enable TPF-OS tracing code" -@@ -5640,11 +5633,11 @@ - - #: config/s390/s390.opt:91 - msgid "Warn if a function uses alloca or creates an array with dynamic size" --msgstr "" -+msgstr "関数で alloca を使用するか、または動的サイズの配列を作成した場合に、警告する" - - #: config/s390/s390.opt:95 - msgid "Warn if a single function's framesize exceeds the given framesize" --msgstr "" -+msgstr "一つの関数のフレームサイズが与えられたフレームサイズを超過する場合に警告する" - - #: config/s390/s390.opt:99 - msgid "z/Architecture" -@@ -5692,7 +5685,7 @@ - - #: config/ia64/ia64.opt:56 - msgid "gp is constant (but save/restore gp on indirect calls)" --msgstr "gp を定数とする(但、間接呼び出しでは gp を save/restore する)" -+msgstr "gp を定数とする(ただし、間接呼び出しでは gp を save/restore する)" - - #: config/ia64/ia64.opt:60 - msgid "Generate self-relocatable code" -@@ -5741,39 +5734,39 @@ - #: config/ia64/ia64.opt:107 config/spu/spu.opt:72 config/sh/sh.opt:258 - #: config/pa/pa.opt:51 - msgid "Specify range of registers to make fixed" --msgstr "" -+msgstr "固定するレジスタの範囲を指定する" - - #: config/ia64/ia64.opt:119 - msgid "Use data speculation before reload" --msgstr "" -+msgstr "reload 前にデータ投機を使用する" - - #: config/ia64/ia64.opt:123 - msgid "Use data speculation after reload" --msgstr "" -+msgstr "reload 後にデータ投機を使用する" - - #: config/ia64/ia64.opt:127 - msgid "Use control speculation" --msgstr "" -+msgstr "制御投機を使用する" - - #: config/ia64/ia64.opt:131 - msgid "Use in block data speculation before reload" --msgstr "" -+msgstr "reload 前にブロック内データ投機を使用する" - - #: config/ia64/ia64.opt:135 - msgid "Use in block data speculation after reload" --msgstr "" -+msgstr "reload 後にブロック内データ投機を使用する" - - #: config/ia64/ia64.opt:139 - msgid "Use in block control speculation" --msgstr "" -+msgstr "ブロック内制御投機を使用する" - - #: config/ia64/ia64.opt:143 - msgid "Use simple data speculation check" --msgstr "" -+msgstr "単純データ投機検査を使用する" - - #: config/ia64/ia64.opt:147 - msgid "Use simple data speculation check for control speculation" --msgstr "" -+msgstr "制御投機用の単純データ投機検査を使用する" - - #: config/ia64/ia64.opt:151 - msgid "If set, data speculative instructions will be chosen for schedule only if there are no other choices at the moment " -@@ -5789,7 +5782,7 @@ - - #: config/ia64/ia64.opt:163 - msgid "Place a stop bit after every cycle when scheduling" --msgstr "" -+msgstr "スケジューリング時の各サイクル後にストップビットを配置する" - - #: config/ia64/ia64.opt:167 - msgid "Assume that floating-point stores and loads are not likely to cause conflict when placed into one instruction group" -@@ -5805,7 +5798,7 @@ - - #: config/ia64/ia64.opt:179 - msgid "Don't generate checks for control speculation in selective scheduling" --msgstr "" -+msgstr "選択的スケジューリング内では制御投機用の検査を生成しない" - - #: config/ia64/vms_symvec_libgcc_s.opt:3 - msgid "! It would be better to auto-generate this file." -@@ -6109,7 +6102,7 @@ - - #: config/m68k/m68k.opt:160 config/bfin/bfin.opt:61 - msgid "Enable separate data segment" --msgstr "" -+msgstr "分離データセグメントを有効にする" - - #: config/m68k/m68k.opt:164 config/bfin/bfin.opt:57 - msgid "ID of shared library to build" -@@ -6149,7 +6142,7 @@ - - #: config/i386/mingw.opt:23 - msgid "Warn about none ISO msvcrt scanf/printf width extensions" --msgstr "" -+msgstr "非 ISO の msvcrt scanf/printf の幅拡張に関して警告する" - - #: config/i386/mingw.opt:27 - msgid "For nested functions on stack executable permission is set." -@@ -6201,7 +6194,7 @@ - - #: config/i386/i386.opt:114 - msgid "Data greater than given threshold will go into .ldata section in x86-64 medium model" --msgstr "" -+msgstr "x86-64 メディアモデルでは与えられた閾値より大きいデータを .ldata セクションに配置する" - - #: config/i386/i386.opt:118 - msgid "Use given x86-64 code model" -@@ -6217,16 +6210,15 @@ - - #: config/i386/i386.opt:129 - msgid "Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack" --msgstr "" -+msgstr "スタックを再整列するために動的再整列引数ポインタ (Dynamic Realigned Argument Pointer, DRAP) を常に使用する" - - #: config/i386/i386.opt:133 - msgid "Return values of functions in FPU registers" - msgstr "FPU レジスタ内の機能の値を返す" - - #: config/i386/i386.opt:137 --#, fuzzy - msgid "Generate floating point mathematics using given instruction set" --msgstr "ハードウェア浮動小数点命令を使用する" -+msgstr "与えられた命令集合を使用して浮動小数数値計算を生成する" - - #: config/i386/i386.opt:149 - msgid "Inline all known string operations" -@@ -6314,8 +6306,9 @@ - msgstr "8 バイトベクトルをメモリに返す" - - #: config/i386/i386.opt:253 -+#, fuzzy - msgid "Generate reciprocals instead of divss and sqrtss." --msgstr "" -+msgstr "divss および sqrtss の代わりに逆数 (reciprocal) を生成する" - - #: config/i386/i386.opt:257 - msgid "Generate cld instruction in the function prologue." -@@ -6331,7 +6324,7 @@ - - #: config/i386/i386.opt:271 - msgid "Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer." --msgstr "" -+msgstr "自動ベクトル化で 256 ビット AVX 命令の代わりに 128 ビット AVX 命令を使用する" - - #: config/i386/i386.opt:277 - msgid "Generate 32bit i386 code" -@@ -6382,9 +6375,8 @@ - msgstr "SSE4.1 と SSE4.2 の組み込み関数とコード生成をサポートしない" - - #: config/i386/i386.opt:328 --#, fuzzy - msgid "%<-msse5%> was removed" --msgstr "'-msse5' は削除されました" -+msgstr "%<-msse5%> は削除されました" - - #: config/i386/i386.opt:333 - msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation" -@@ -6544,7 +6536,7 @@ - - #: config/rs6000/rs6000.opt:152 - msgid "Use PowerPC General Purpose group optional instructions" --msgstr "PowerPC 一般用途グループオプション命令を使用する" -+msgstr "PowerPC 汎用グループオプション命令を使用する" - - #: config/rs6000/rs6000.opt:156 - msgid "Use PowerPC Graphics group optional instructions" -@@ -7002,7 +6994,7 @@ - - #: config/mcore/mcore.opt:56 config/fr30/fr30.opt:27 - msgid "Assume that run-time support has been provided, so omit -lsim from the linker command line" --msgstr "" -+msgstr "実行時サポートが提供されると見なし、リンカコマンドラインに -lsim を含めない" - - #: config/mcore/mcore.opt:60 - msgid "Use arbitrary sized immediates in bit operations" -@@ -7014,7 +7006,7 @@ - - #: config/mcore/mcore.opt:71 - msgid "Set the maximum amount for a single stack increment operation" --msgstr "単一のスタックインクリメント操作の最大値を設定する" -+msgstr "単一のスタック増分操作の最大値を設定する" - - #: config/mcore/mcore.opt:75 - msgid "Always treat bitfields as int-sized" -@@ -7234,7 +7226,7 @@ - - #: config/sh/sh.opt:246 - msgid "Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table" --msgstr "" -+msgstr "除算戦略、次のいずれか: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table" - - #: config/sh/sh.opt:250 - msgid "Specify name for 32 bit signed division function" -@@ -7282,7 +7274,7 @@ - - #: config/sh/sh.opt:298 - msgid "Mark MAC register as call-clobbered" --msgstr "" -+msgstr "MAC レジスタを呼び出しで破壊されるとマークする" - - #: config/sh/sh.opt:304 - msgid "Make structs a multiple of 4 bytes (warning: ABI altered)" -@@ -7960,7 +7952,7 @@ - - #: config/m68hc11/m68hc11.opt:49 - msgid "Auto pre/post decrement increment allowed" --msgstr "自動 pre/post デクリメント インクリメントを許容する" -+msgstr "自動 前置/後置 減分/増分 を許容する" - - #: config/m68hc11/m68hc11.opt:53 - msgid "Min/max instructions allowed" -@@ -7972,7 +7964,7 @@ - - #: config/m68hc11/m68hc11.opt:61 - msgid "Auto pre/post decrement increment not allowed" --msgstr "自動 pre/post デクリメント インクリメントを許容しない" -+msgstr "自動 前置/後置 減分/増分を許容しない" - - #: config/m68hc11/m68hc11.opt:65 - msgid "Use jsr and rts for function calls and returns" -@@ -8346,7 +8338,7 @@ - - #: config/bfin/bfin.opt:69 - msgid "Link with the fast floating-point library" --msgstr "" -+msgstr "高速な浮動小数ライブラリとリンクする" - - #: config/bfin/bfin.opt:81 - msgid "Do stack checking using bounds in L1 scratch memory" -@@ -8382,7 +8374,7 @@ - - #: config/picochip/picochip.opt:31 - msgid "Specify whether the byte access instructions should be used. Enabled by default." --msgstr "" -+msgstr "バイトアクセス命令を使用するかどうかを指定する。デフォルトでは有効となる" - - #: config/picochip/picochip.opt:35 - msgid "Enable debug output to be generated." -@@ -8390,11 +8382,11 @@ - - #: config/picochip/picochip.opt:39 - msgid "Allow a symbol value to be used as an immediate value in an instruction." --msgstr "" -+msgstr "命令内でシンボル値が即値として使用されることを許可する" - - #: config/picochip/picochip.opt:43 - msgid "Generate warnings when inefficient code is known to be generated." --msgstr "" -+msgstr "非効率なコードが生成された時に警告する" - - #: config/vxworks.opt:36 - msgid "Assume the VxWorks RTP environment" -@@ -8418,7 +8410,7 @@ - - #: config/darwin.opt:205 - msgid "Warn if constant CFString objects contain non-portable characters" --msgstr "" -+msgstr "定数 CFString オブジェクトが移植性の無い文字を含む場合に警告する" - - #: config/darwin.opt:210 - msgid "Generate AT&T-style stubs for Mach-O" -@@ -8430,7 +8422,7 @@ - - #: config/darwin.opt:218 - msgid "Generate code suitable for fast turn around debugging" --msgstr "" -+msgstr "デバッグを高速に行うために適したコードを生成する" - - #: config/darwin.opt:227 - msgid "The earliest MacOS X version on which this program will run" -@@ -8442,15 +8434,15 @@ - - #: config/darwin.opt:235 - msgid "Generate code for darwin loadable kernel extensions" --msgstr "" -+msgstr "darwin ロード可能カーネル拡張用のコードを生成する" - - #: config/darwin.opt:239 - msgid "Generate code for the kernel or loadable kernel extensions" --msgstr "" -+msgstr "カーネル用、またはロード可能カーネル拡張用のコードを生成する" - - #: config/darwin.opt:243 - msgid "-iframework \tAdd to the end of the system framework include path" --msgstr "" -+msgstr "-iframework \t をシステムフレームワークインクルードパスの末尾に加える" - - #: config/lynx.opt:23 - msgid "Support legacy multi-threading" -@@ -8737,7 +8729,6 @@ - msgstr "" - - #: config/microblaze/microblaze.opt:92 --#, fuzzy - msgid "Use hardware floating point conversion instructions" - msgstr "ハードウェア浮動小数点変換命令を使用する" - -@@ -8862,7 +8853,7 @@ - - #: c-family/c.opt:249 - msgid "-MT \tAdd an unquoted target" --msgstr "" -+msgstr "-MT \tターゲット (引用符を付けない) を追加する" - - #: c-family/c.opt:253 - msgid "Do not generate #line directives" -@@ -8898,11 +8889,11 @@ - - #: c-family/c.opt:288 - msgid "Warn about C constructs that are not in the common subset of C and C++" --msgstr "" -+msgstr "C と C++ の共通部分集合では無い C 構文に関して警告する" - - #: c-family/c.opt:292 - msgid "Warn about C++ constructs whose meaning differs between ISO C++ 1998 and ISO C++ 200x" --msgstr "" -+msgstr "ISO C++ 1998 と ISO C++ 200x で意味が異なる C++ 構文に関して警告する" - - #: c-family/c.opt:296 - msgid "Warn about casts which discard qualifiers" -@@ -8962,7 +8953,7 @@ - - #: c-family/c.opt:352 - msgid "Warn about stray tokens after #elif and #endif" --msgstr "" -+msgstr "#elif および #endif の後にあるはぐれたトークンに関して警告する" - - #: c-family/c.opt:356 - msgid "Warn about comparison of different enum types" -@@ -8977,9 +8968,8 @@ - msgstr "浮動小数点数の等価比較に関して警告する" - - #: c-family/c.opt:372 --#, fuzzy - msgid "Warn about printf/scanf/strftime/strfmon format string anomalies" --msgstr "printf/scanf/strftime/strfmon 形式の変則的なものに関して警告する" -+msgstr "printf/scanf/strftime/strfmon 書式文字列異常に関して警告する" - - #: c-family/c.opt:376 - msgid "Warn if passing too many arguments to a function for its format string" -@@ -8998,9 +8988,8 @@ - msgstr "セキュリティ問題になる可能性がある書式関数に関して警告する" - - #: c-family/c.opt:392 --#, fuzzy - msgid "Warn about strftime formats yielding 2-digit years" --msgstr "strftime 形式が二桁で年を表している時の警告しない" -+msgstr "strftime 書式が 2 桁の年の場合に警告する" - - #: c-family/c.opt:396 - msgid "Warn about zero-length formats" -@@ -9409,7 +9398,7 @@ - - #: c-family/c.opt:823 - msgid "Don't emit dllexported inline functions unless needed" --msgstr "" -+msgstr "必要が無い限り dllexported インライン関数を発行しない" - - #: c-family/c.opt:830 - msgid "Allow implicit conversions between vectors with differing numbers of subparts and/or differing element types." -@@ -9501,7 +9490,7 @@ - - #: c-family/c.opt:942 - msgid "Generate run time type descriptor information" --msgstr "" -+msgstr "実行時型記述子情報を生成する" - - #: c-family/c.opt:946 - msgid "Use the same size for double as for float" -@@ -9889,19 +9878,16 @@ - msgstr "関数が __attribute__((pure)) の候補となりそうな場合に警告する" - - #: common.opt:608 --#, fuzzy - msgid "Warn about enumerated switches, with no default, missing a case" --msgstr "列挙定数の switch で case 指定が欠けているものに関して警告する" -+msgstr "列挙定数を使用した switch 文で default 文が無いか特定の case が無い場合に警告する" - - #: common.opt:612 --#, fuzzy - msgid "Warn about enumerated switches missing a \"default:\" statement" --msgstr "列挙定数の switch で case 指定が欠けているものに関して警告する" -+msgstr "列挙定数を使用した switch 文で \"default:\" 文が無い場合に警告する" - - #: common.opt:616 --#, fuzzy - msgid "Warn about all enumerated switches missing a specific case" --msgstr "列挙定数の switch で case 指定が欠けているものに関して警告する" -+msgstr "列挙定数を使用した switch 文で特定の case が無い場合に警告する" - - #: common.opt:620 - msgid "Do not suppress warnings from system headers" -@@ -10000,9 +9986,8 @@ - msgstr "自動増加/減少命令を生成する" - - #: common.opt:821 --#, fuzzy - msgid "Generate code to check bounds before indexing arrays" --msgstr "配列の添字と添字境界を検査するコードを生成する" -+msgstr "配列の添え字を使用する前に境界検査を行うコードを生成する" - - #: common.opt:825 - #, fuzzy -@@ -10043,7 +10028,7 @@ - - #: common.opt:864 - msgid "Looks for opportunities to reduce stack adjustments and stack references." --msgstr "" -+msgstr "スタック調整およびスタック参照を削減する機会を探す" - - #: common.opt:868 - msgid "Do not put uninitialized globals in the common section" -@@ -10058,18 +10043,16 @@ - msgstr "" - - #: common.opt:884 --#, fuzzy - msgid "Perform comparison elimination after register allocation has finished" --msgstr "グローバル共通部分式を除去する" -+msgstr "レジスタは位置が完了した後に比較の除去を行う" - - #: common.opt:888 - msgid "Do not perform optimizations increasing noticeably stack usage" --msgstr "" -+msgstr "スタック使用量を著しく増加させる最適化を行わない" - - #: common.opt:892 --#, fuzzy - msgid "Perform a register copy-propagation optimization pass" --msgstr "最適化過程のレジスタつけ変えを行なう" -+msgstr "" - - #: common.opt:896 - msgid "Perform cross-jumping optimization" -@@ -10101,7 +10084,7 @@ - - #: common.opt:928 - msgid "Map one directory name to another in debug information" --msgstr "" -+msgstr "デバッグ情報内のディレクトリー名を他のものにマップする" - - #: common.opt:934 - msgid "Defer popping functions args from stack until later" -@@ -10116,9 +10099,8 @@ - msgstr "無意味な null ポインタ検査を削除する" - - #: common.opt:946 --#, fuzzy - msgid "Try to convert virtual calls to direct ones." --msgstr "リンカが PIC 呼び出しを直接呼び出しに変更することを許可するように試みる" -+msgstr "仮想呼び出しを直接呼び出しに変換することを試みる" - - #: common.opt:950 - #, fuzzy -@@ -10137,7 +10119,7 @@ - - #: common.opt:978 - msgid "-fdump-final-insns=filename\tDump to filename the insns at the end of translation" --msgstr "" -+msgstr "-fdump-final-insns=filename\t翻訳終了時に filename へ命令をダンプする" - - #: common.opt:982 - msgid "-fdump-go-spec=filename\tWrite all declarations to file as Go code" -@@ -10173,7 +10155,7 @@ - - #: common.opt:1014 common.opt:1018 - msgid "Perform unused type elimination in debug info" --msgstr "" -+msgstr "デバッグ情報内で使用されていない型の除去を行う" - - #: common.opt:1022 - msgid "Do not suppress C++ class debug information." -@@ -10181,25 +10163,24 @@ - - #: common.opt:1026 - msgid "Generate debug information to support Identical Code Folding (ICF)" --msgstr "" -+msgstr "Identical Code Folding (ICF) をサポートするためのデバッグ情報を生成する" - - #: common.opt:1030 - msgid "Enable exception handling" - msgstr "例外処理を有効にする" - - #: common.opt:1034 --#, fuzzy - msgid "Perform a number of minor, expensive optimizations" --msgstr "目立たない、コストのかかる最適化を行なう" -+msgstr "多くの、目立たないがコストが高い最適化を行う" - - #: common.opt:1038 - msgid "-fexcess-precision=[fast|standard]\tSpecify handling of excess floating-point precision" --msgstr "" -+msgstr "-fexcess-precision=[fast|standard]\t余分な浮動小数点精度の取り扱いを指定する" - - #: common.opt:1041 - #, c-format - msgid "unknown excess precision style %qs" --msgstr "" -+msgstr "不明な余分な精度スタイル %qs です" - - #: common.opt:1054 - msgid "Assume no NaNs or infinities are generated" -@@ -10222,52 +10203,45 @@ - msgstr "" - - #: common.opt:1077 --#, fuzzy, c-format -+#, c-format - msgid "unknown floating point contraction style %qs" --msgstr "浮動小数点定数を TOC 内に配置する" -+msgstr "不明な浮動小数短縮形 %qs です" - - #: common.opt:1094 - msgid "Allow function addresses to be held in registers" - msgstr "関数アドレスをレジスタに持たせる事を許可する" - - #: common.opt:1098 --#, fuzzy - msgid "Place each function into its own section" --msgstr "各々の関数をそれ自身のセクションに配置する" -+msgstr "それぞれの関数をそれ自身のセクションに配置する" - - #: common.opt:1102 --#, fuzzy - msgid "Perform global common subexpression elimination" --msgstr "グローバル共通部分式を除去する" -+msgstr "大域共通部分式の除去を行う" - - #: common.opt:1106 --#, fuzzy - msgid "Perform enhanced load motion during global common subexpression elimination" --msgstr "グローバル共通部分式を除去する" -+msgstr "" - - #: common.opt:1110 --#, fuzzy - msgid "Perform store motion after global common subexpression elimination" --msgstr "グローバル共通部分式を除去する" -+msgstr "" - - #: common.opt:1114 - msgid "Perform redundant load after store elimination in global common subexpression" - msgstr "" - - #: common.opt:1119 --#, fuzzy - msgid "Perform global common subexpression elimination after register allocation" --msgstr "グローバル共通部分式を除去する" -+msgstr "レジスタ配置後に大域共通部分式の除去を行う" - - #: common.opt:1125 --#, fuzzy - msgid "Enable in and out of Graphite representation" --msgstr "自動テンプレート実体化を有効にする" -+msgstr "" - - #: common.opt:1129 --#, fuzzy - msgid "Enable Graphite Identity transformation" --msgstr "呼び出しグラフ情報を送出する" -+msgstr "" - - #: common.opt:1133 - msgid "Mark all loops as parallel" -@@ -10306,9 +10280,8 @@ - msgstr "" - - #: common.opt:1173 --#, fuzzy - msgid "Perform conversion of conditional jumps to conditional execution" --msgstr "条件的な実行への変更のための閾値を変更する" -+msgstr "" - - #: common.opt:1177 - msgid "Convert conditional jumps in innermost loops to branchless equivalents" -@@ -10376,13 +10349,12 @@ - msgstr "" - - #: common.opt:1253 --#, fuzzy - msgid "Discover pure and const functions" --msgstr "使われない仮想関数を切り捨てる" -+msgstr "純粋および定数関数を見つける" - - #: common.opt:1257 - msgid "Discover readonly and non addressable static variables" --msgstr "" -+msgstr "読み取り専用およびアドレス付けできない静的変数を見つける" - - #: common.opt:1261 - msgid "Perform matrix layout flattening and transposing based" -@@ -10415,9 +10387,8 @@ - msgstr "" - - #: common.opt:1305 --#, fuzzy - msgid "Share slots for saving different hard registers." --msgstr "引数レジスタにローカルのものを格納する" -+msgstr "" - - #: common.opt:1309 - msgid "Share stack slots for spilled pseudo-registers." -@@ -10621,19 +10592,19 @@ - - #: common.opt:1521 - msgid "Enable basic program profiling code" --msgstr "" -+msgstr "基本プログラムプロファイリングコードを有効にする" - - #: common.opt:1525 - msgid "Insert arc-based program profiling code" --msgstr "" -+msgstr "円弧ベースプログラムプロファイリングコードを挿入する" - - #: common.opt:1529 - msgid "Set the top-level directory for storing the profile data." --msgstr "" -+msgstr "プロファイルデータ保存用の最上位ディレクリーを設定する" - - #: common.opt:1534 - msgid "Enable correction of flow inconsistent profile data input" --msgstr "" -+msgstr "フロー一貫性が無いデータ入力の訂正を有効にする" - - #: common.opt:1538 - msgid "Enable common options for generating profile info for profile feedback directed optimizations" -@@ -10653,7 +10624,7 @@ - - #: common.opt:1554 - msgid "Insert code to profile values of expressions" --msgstr "" -+msgstr "式の値をプロファイルするためのコードを挿入する" - - #: common.opt:1561 - msgid "-frandom-seed=\tMake compile reproducible using " -@@ -11836,7 +11807,7 @@ - #: c-decl.c:1150 - #, gcc-internal-format - msgid "nested function %q+D declared but never defined" --msgstr "" -+msgstr "入れ子になった関数 %q+D が宣言されましたが定義されていません" - - #: c-decl.c:1162 - #, gcc-internal-format -@@ -11893,7 +11864,7 @@ - #: c-decl.c:1613 - #, gcc-internal-format - msgid "prototype for %q+D follows non-prototype definition" --msgstr "" -+msgstr "非プロトタイプ定義に続いて %q+D 用のプロトタイプがあります" - - #: c-decl.c:1628 - #, gcc-internal-format -@@ -11969,9 +11940,9 @@ - msgstr "%q+D が異なる型で再定義されました" - - #: c-decl.c:1818 --#, fuzzy, gcc-internal-format -+#, gcc-internal-format - msgid "redefinition of typedef %q+D with variably modified type" --msgstr "%q+D が異なる型で再定義されました" -+msgstr "" - - #: c-decl.c:1825 - #, gcc-internal-format -@@ -12016,17 +11987,17 @@ - #: c-decl.c:2001 - #, gcc-internal-format - msgid "extern declaration of %q+D follows declaration with no linkage" --msgstr "" -+msgstr "リンク無し宣言の後に %q+D の extern 宣言が続いています" - - #: c-decl.c:2037 - #, gcc-internal-format - msgid "declaration of %q+D with no linkage follows extern declaration" --msgstr "" -+msgstr "extern 宣言の後にリンク無し %q+D の宣言が続いています" - - #: c-decl.c:2043 - #, gcc-internal-format - msgid "redeclaration of %q+D with no linkage" --msgstr "" -+msgstr "リンク無し %q+D の再定義です" - - #: c-decl.c:2069 - #, gcc-internal-format -@@ -12354,7 +12325,7 @@ - #: c-decl.c:4614 - #, gcc-internal-format - msgid "defining a type in a compound literal is invalid in C++" --msgstr "" -+msgstr "複合リテラル内での型定義は C++ では無効です" - - #: c-decl.c:4666 c-decl.c:4681 - #, gcc-internal-format -@@ -13393,23 +13364,23 @@ - #: c-parser.c:1829 - #, gcc-internal-format - msgid "expression in static assertion is not an integer" --msgstr "" -+msgstr "静的アサーション内の式が整数ではありません" - - #: c-parser.c:1836 - #, gcc-internal-format - msgid "expression in static assertion is not an integer constant expression" --msgstr "" -+msgstr "静的アサーション内の式が整数定数式ではありません" - - #: c-parser.c:1841 - #, gcc-internal-format - msgid "expression in static assertion is not constant" --msgstr "" -+msgstr "静的アサーション内の式が定数ではありません" - - #. Report the error. - #: c-parser.c:1846 cp/semantics.c:4719 - #, gcc-internal-format - msgid "static assertion failed: %E" --msgstr "" -+msgstr "静的アサーションに失敗しました: %E" - - #: c-parser.c:2207 c-parser.c:3063 c-parser.c:3709 c-parser.c:3983 - #: c-parser.c:5084 c-parser.c:5175 c-parser.c:5800 c-parser.c:6083 -@@ -14125,12 +14096,12 @@ - #: c-typeck.c:3578 - #, gcc-internal-format - msgid "increment of enumeration value is invalid in C++" --msgstr "列挙値のインクリメントは C++ では無効です" -+msgstr "列挙値の増分は C++ では無効です" - - #: c-typeck.c:3581 - #, gcc-internal-format - msgid "decrement of enumeration value is invalid in C++" --msgstr "列挙値のデクリメントは C++ では無効です" -+msgstr "列挙値の減分は C++ では無効です" - - #: c-typeck.c:3594 - #, gcc-internal-format -@@ -14140,22 +14111,22 @@ - #: c-typeck.c:3613 c-typeck.c:3645 - #, gcc-internal-format - msgid "wrong type argument to increment" --msgstr "インクリメントする引数の型が間違っています" -+msgstr "増分する引数の型が間違っています" - - #: c-typeck.c:3615 c-typeck.c:3648 - #, gcc-internal-format - msgid "wrong type argument to decrement" --msgstr "デクリメントする引数の型が間違っています" -+msgstr "減分する引数の型が間違っています" - - #: c-typeck.c:3635 - #, gcc-internal-format - msgid "increment of pointer to unknown structure" --msgstr "不明な構造体へのポインタのインクリメントです" -+msgstr "不明な構造体へのポインタの増分です" - - #: c-typeck.c:3638 - #, gcc-internal-format - msgid "decrement of pointer to unknown structure" --msgstr "不明な構造体へのポインタのデクリメントです" -+msgstr "不明な構造体へのポインタの減分です" - - #: c-typeck.c:3722 - #, gcc-internal-format -@@ -14170,12 +14141,12 @@ - #: c-typeck.c:3895 c-family/c-common.c:8658 - #, gcc-internal-format - msgid "increment of read-only location %qE" --msgstr "読み取り専用位置 %qE のインクリメントです" -+msgstr "読み取り専用位置 %qE の増分です" - - #: c-typeck.c:3898 c-family/c-common.c:8659 - #, gcc-internal-format - msgid "decrement of read-only location %qE" --msgstr "読み取り専用位置 %qE のデクリメントです" -+msgstr "読み取り専用位置 %qE の減分です" - - #: c-typeck.c:3939 - #, gcc-internal-format -@@ -19787,12 +19758,12 @@ - #: c-family/c-common.c:8611 - #, gcc-internal-format - msgid "increment of member %qD in read-only object" --msgstr "読み取り専用オブジェクト内のメンバ %qD のインクリメントです" -+msgstr "読み取り専用オブジェクト内のメンバ %qD の増分です" - - #: c-family/c-common.c:8613 - #, gcc-internal-format - msgid "decrement of member %qD in read-only object" --msgstr "読み取り専用オブジェクト内のメンバ %qD のデクリメントです" -+msgstr "読み取り専用オブジェクト内のメンバ %qD の減分です" - - #: c-family/c-common.c:8615 - #, gcc-internal-format -@@ -19807,12 +19778,12 @@ - #: c-family/c-common.c:8620 - #, gcc-internal-format - msgid "increment of read-only member %qD" --msgstr "読み取り専用メンバ %qD のインクリメントです" -+msgstr "読み取り専用メンバ %qD の増分です" - - #: c-family/c-common.c:8621 - #, gcc-internal-format - msgid "decrement of read-only member %qD" --msgstr "読み取り専用メンバ %qD のデクリメントです" -+msgstr "読み取り専用メンバ %qD の減分です" - - #: c-family/c-common.c:8622 - #, gcc-internal-format -@@ -19827,12 +19798,12 @@ - #: c-family/c-common.c:8627 - #, gcc-internal-format - msgid "increment of read-only variable %qD" --msgstr "読み取り専用変数 %qD のインクリメントです" -+msgstr "読み取り専用変数 %qD の増分です" - - #: c-family/c-common.c:8628 - #, gcc-internal-format - msgid "decrement of read-only variable %qD" --msgstr "読み取り専用変数 %qD のデクリメントです" -+msgstr "読み取り専用変数 %qD の減分です" - - #: c-family/c-common.c:8629 - #, gcc-internal-format -@@ -19847,12 +19818,12 @@ - #: c-family/c-common.c:8633 - #, gcc-internal-format - msgid "increment of read-only parameter %qD" --msgstr "読み取り専用パラメータ %qD のインクリメントです" -+msgstr "読み取り専用パラメータ %qD の増分です" - - #: c-family/c-common.c:8634 - #, gcc-internal-format - msgid "decrement of read-only parameter %qD" --msgstr "読み取り専用パラメータ %qD のデクリメントです" -+msgstr "読み取り専用パラメータ %qD の減分です" - - #: c-family/c-common.c:8635 - #, gcc-internal-format -@@ -19867,12 +19838,12 @@ - #: c-family/c-common.c:8642 - #, gcc-internal-format - msgid "increment of read-only named return value %qD" --msgstr "読み取り専用名前付き戻り値 %qD のインクリメントです" -+msgstr "読み取り専用名前付き戻り値 %qD の増分です" - - #: c-family/c-common.c:8644 - #, gcc-internal-format - msgid "decrement of read-only named return value %qD" --msgstr "読み取り専用名前付き戻り値 %qD のデクリメントです" -+msgstr "読み取り専用名前付き戻り値 %qD の減分です" - - #: c-family/c-common.c:8646 - #, gcc-internal-format -@@ -19887,12 +19858,12 @@ - #: c-family/c-common.c:8652 - #, gcc-internal-format - msgid "increment of function %qD" --msgstr "関数 %qD のインクリメントです" -+msgstr "関数 %qD の増分です" - - #: c-family/c-common.c:8653 - #, gcc-internal-format - msgid "decrement of function %qD" --msgstr "関数 %qD のデクリメントです" -+msgstr "関数 %qD の減分です" - - #: c-family/c-common.c:8654 - #, gcc-internal-format -@@ -19912,12 +19883,12 @@ - #: c-family/c-common.c:8677 - #, gcc-internal-format - msgid "lvalue required as increment operand" --msgstr "インクリメントの被演算子として左辺値が必要です" -+msgstr "増分の被演算子として左辺値が必要です" - - #: c-family/c-common.c:8680 - #, gcc-internal-format - msgid "lvalue required as decrement operand" --msgstr "デクリメントの被演算子として左辺値が必要です" -+msgstr "減分の被演算子として左辺値が必要です" - - #: c-family/c-common.c:8683 - #, gcc-internal-format -@@ -20556,12 +20527,12 @@ - #: c-family/c-omp.c:375 cp/semantics.c:4431 - #, gcc-internal-format - msgid "missing increment expression" --msgstr "インクリメント式がありません" -+msgstr "増分式がありません" - - #: c-family/c-omp.c:444 cp/semantics.c:4287 - #, gcc-internal-format - msgid "invalid increment expression" --msgstr "無効なインクリメント式です" -+msgstr "無効な増分式です" - - #: c-family/c-opts.c:303 - #, gcc-internal-format -@@ -20571,7 +20542,7 @@ - #: c-family/c-opts.c:306 - #, gcc-internal-format - msgid "obsolete option -I- used, please use -iquote instead" --msgstr "廃止あれたオプション -I- が使用されています。代わりに -iquote を使用してください" -+msgstr "廃止されたオプション -I- が使用されています。代わりに -iquote を使用してください" - - #: c-family/c-opts.c:486 - #, gcc-internal-format ---- a/src/gcc/recog.c -+++ b/src/gcc/recog.c -@@ -930,7 +930,9 @@ - return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode - || mode == VOIDmode) - && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) -- && LEGITIMATE_CONSTANT_P (op)); -+ && targetm.legitimate_constant_p (mode == VOIDmode -+ ? GET_MODE (op) -+ : mode, op)); - /* Except for certain constants with VOIDmode, already checked for, - OP's mode must match MODE if MODE specifies a mode. */ -@@ -1107,7 +1109,9 @@ - && (GET_MODE (op) == mode || mode == VOIDmode - || GET_MODE (op) == VOIDmode) - && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) -- && LEGITIMATE_CONSTANT_P (op)); -+ && targetm.legitimate_constant_p (mode == VOIDmode -+ ? GET_MODE (op) -+ : mode, op)); - } - - /* Returns 1 if OP is an operand that is a CONST_INT. */ ---- a/src/gcc/regcprop.c -+++ b/src/gcc/regcprop.c -@@ -418,10 +418,9 @@ - - offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0) - + (BYTES_BIG_ENDIAN ? byteoffset : 0)); -- return gen_rtx_raw_REG (new_mode, -- regno + subreg_regno_offset (regno, orig_mode, -- offset, -- new_mode)); -+ regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); -+ if (HARD_REGNO_MODE_OK (regno, new_mode)) -+ return gen_rtx_raw_REG (new_mode, regno); - } - return NULL_RTX; - } ---- a/src/gcc/reload.c -+++ b/src/gcc/reload.c -@@ -1017,6 +1017,7 @@ - #ifdef CANNOT_CHANGE_MODE_CLASS - && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass) - #endif -+ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))] - && (CONSTANT_P (SUBREG_REG (in)) - || GET_CODE (SUBREG_REG (in)) == PLUS - || strict_low -@@ -1123,6 +1124,7 @@ - #ifdef CANNOT_CHANGE_MODE_CLASS - && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass) - #endif -+ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))] - && (CONSTANT_P (SUBREG_REG (out)) - || strict_low - || (((REG_P (SUBREG_REG (out)) -@@ -4721,7 +4723,8 @@ - simplify_gen_subreg (GET_MODE (x), reg_equiv_constant[regno], - GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); - gcc_assert (tem); -- if (CONSTANT_P (tem) && !LEGITIMATE_CONSTANT_P (tem)) -+ if (CONSTANT_P (tem) -+ && !targetm.legitimate_constant_p (GET_MODE (x), tem)) - { - tem = force_const_mem (GET_MODE (x), tem); - i = find_reloads_address (GET_MODE (tem), &tem, XEXP (tem, 0), -@@ -6049,7 +6052,7 @@ - enum reload_type type, int ind_levels) - { - if (CONSTANT_P (x) -- && (! LEGITIMATE_CONSTANT_P (x) -+ && (!targetm.legitimate_constant_p (mode, x) - || targetm.preferred_reload_class (x, rclass) == NO_REGS)) - { - x = force_const_mem (mode, x); -@@ -6059,7 +6062,7 @@ - - else if (GET_CODE (x) == PLUS - && CONSTANT_P (XEXP (x, 1)) -- && (! LEGITIMATE_CONSTANT_P (XEXP (x, 1)) -+ && (!targetm.legitimate_constant_p (GET_MODE (x), XEXP (x, 1)) - || targetm.preferred_reload_class (XEXP (x, 1), rclass) - == NO_REGS)) +- if (! (g = create_ddg (bb, 0))) ++ /* Always schedule the closing branch with the rest of the ++ instructions. The branch is rotated to be in row ii-1 at the ++ end of the scheduling procedure to make sure it's the last ++ instruction in the iteration. */ ++ if (! (g = create_ddg (bb, 1))) + { + if (dump_file) + fprintf (dump_file, "SMS create_ddg failed\n"); +@@ -1072,9 +1535,9 @@ { ---- a/src/gcc/reload1.c -+++ b/src/gcc/reload1.c -@@ -3899,6 +3899,10 @@ - if (XEXP (x, 0)) - set_label_offsets (XEXP (x, 0), NULL_RTX, 1); + rtx head, tail; + rtx count_reg, count_init; +- int mii, rec_mii; +- unsigned stage_count = 0; ++ int mii, rec_mii, stage_count, min_cycle; + HOST_WIDEST_INT loop_count = 0; ++ bool opt_sc_p; -+ for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1)) -+ if (XEXP (x, 0)) -+ set_label_offsets (XEXP (x, 0), NULL_RTX, 1); -+ - for_each_eh_label (set_initial_eh_label_offset); - } + if (! (g = g_arr[loop->num])) + continue; +@@ -1151,63 +1614,103 @@ + fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", + rec_mii, mii, maxii); -@@ -4155,6 +4159,9 @@ - } - else if (function_invariant_p (x)) - { -+ enum machine_mode mode; +- /* After sms_order_nodes and before sms_schedule_by_order, to copy over +- ASAP. */ +- set_node_sched_params (g); +- +- ps = sms_schedule_by_order (g, mii, maxii, node_order); +- +- if (ps){ +- stage_count = PS_STAGE_COUNT (ps); +- gcc_assert(stage_count >= 1); +- } +- +- /* Stage count of 1 means that there is no interleaving between +- iterations, let the scheduling passes do the job. */ +- if (stage_count <= 1 +- || (count_init && (loop_count <= stage_count)) +- || (flag_branch_probabilities && (trip_count <= stage_count))) ++ for (;;) + { +- if (dump_file) ++ set_node_sched_params (g); + -+ mode = GET_MODE (SET_DEST (set)); - if (GET_CODE (x) == PLUS) - { - /* This is PLUS of frame pointer and a constant, -@@ -4167,12 +4174,11 @@ - reg_equiv_invariant[i] = x; - num_eliminable_invariants++; - } -- else if (LEGITIMATE_CONSTANT_P (x)) -+ else if (targetm.legitimate_constant_p (mode, x)) - reg_equiv_constant[i] = x; - else - { -- reg_equiv_memory_loc[i] -- = force_const_mem (GET_MODE (SET_DEST (set)), x); -+ reg_equiv_memory_loc[i] = force_const_mem (mode, x); - if (! reg_equiv_memory_loc[i]) - reg_equiv_init[i] = NULL_RTX; - } -@@ -4474,6 +4480,43 @@ - } - } - } -+ -+/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload. -+ If *OP_PTR is a paradoxical subreg, try to remove that subreg -+ and apply the corresponding narrowing subreg to *OTHER_PTR. -+ Return true if the operands were changed, false otherwise. */ -+ -+static bool -+strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr) -+{ -+ rtx op, inner, other, tem; -+ -+ op = *op_ptr; -+ if (GET_CODE (op) != SUBREG) -+ return false; ++ stage_count = 0; ++ opt_sc_p = false; ++ ps = sms_schedule_by_order (g, mii, maxii, node_order); ++ ++ if (ps) + { +- fprintf (dump_file, "SMS failed... \n"); +- fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); +- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); +- fprintf (dump_file, ", trip-count="); +- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); +- fprintf (dump_file, ")\n"); +- } +- continue; +- } +- else +- { +- struct undo_replace_buff_elem *reg_move_replaces; ++ /* Try to achieve optimized SC by normalizing the partial ++ schedule (having the cycles start from cycle zero). ++ The branch location must be placed in row ii-1 in the ++ final scheduling. If failed, shift all instructions to ++ position the branch in row ii-1. */ ++ opt_sc_p = optimize_sc (ps, g); ++ if (opt_sc_p) ++ stage_count = calculate_stage_count (ps, 0); ++ else ++ { ++ /* Bring the branch to cycle ii-1. */ ++ int amount = (SCHED_TIME (g->closing_branch->cuid) ++ - (ps->ii - 1)); ++ ++ if (dump_file) ++ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n"); + +- if (dump_file) ++ stage_count = calculate_stage_count (ps, amount); ++ } ++ ++ gcc_assert (stage_count >= 1); ++ } ++ ++ /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of ++ 1 means that there is no interleaving between iterations thus ++ we let the scheduling passes do the job in this case. */ ++ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC) ++ || (count_init && (loop_count <= stage_count)) ++ || (flag_branch_probabilities && (trip_count <= stage_count))) + { +- fprintf (dump_file, +- "SMS succeeded %d %d (with ii, sc)\n", ps->ii, +- stage_count); +- print_partial_schedule (ps, dump_file); +- fprintf (dump_file, +- "SMS Branch (%d) will later be scheduled at cycle %d.\n", +- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); ++ if (dump_file) ++ { ++ fprintf (dump_file, "SMS failed... \n"); ++ fprintf (dump_file, "SMS sched-failed (stage-count=%d," ++ " loop-count=", stage_count); ++ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); ++ fprintf (dump_file, ", trip-count="); ++ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); ++ fprintf (dump_file, ")\n"); ++ } ++ break; + } + +- /* Set the stage boundaries. If the DDG is built with closing_branch_deps, +- the closing_branch was scheduled and should appear in the last (ii-1) +- row. Otherwise, we are free to schedule the branch, and we let nodes +- that were scheduled at the first PS_MIN_CYCLE cycle appear in the first +- row; this should reduce stage_count to minimum. +- TODO: Revisit the issue of scheduling the insns of the +- control part relative to the branch when the control part +- has more than one insn. */ +- normalize_sched_times (ps); +- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); ++ if (!opt_sc_p) ++ { ++ /* Rotate the partial schedule to have the branch in row ii-1. */ ++ int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); ++ ++ reset_sched_times (ps, amount); ++ rotate_partial_schedule (ps, amount); ++ } ++ + set_columns_for_ps (ps); + ++ min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); ++ if (!schedule_reg_moves (ps)) ++ { ++ mii = ps->ii + 1; ++ free_partial_schedule (ps); ++ continue; ++ } + -+ inner = SUBREG_REG (op); -+ if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner))) -+ return false; ++ /* Moves that handle incoming values might have been added ++ to a new first stage. Bump the stage count if so. + -+ other = *other_ptr; -+ tem = gen_lowpart_common (GET_MODE (inner), other); -+ if (!tem) -+ return false; ++ ??? Perhaps we could consider rotating the schedule here ++ instead? */ ++ if (PS_MIN_CYCLE (ps) < min_cycle) ++ { ++ reset_sched_times (ps, 0); ++ stage_count++; ++ } + -+ /* If the lowpart operation turned a hard register into a subreg, -+ rather than simplifying it to another hard register, then the -+ mode change cannot be properly represented. For example, OTHER -+ might be valid in its current mode, but not in the new one. */ -+ if (GET_CODE (tem) == SUBREG -+ && REG_P (other) -+ && HARD_REGISTER_P (other)) -+ return false; ++ /* The stage count should now be correct without rotation. */ ++ gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); ++ PS_STAGE_COUNT (ps) = stage_count; + -+ *op_ptr = inner; -+ *other_ptr = tem; -+ return true; -+} - - /* A subroutine of reload_as_needed. If INSN has a REG_EH_REGION note, - examine all of the reload insns between PREV and NEXT exclusive, and -@@ -5554,7 +5597,7 @@ - chain reloads or do need an intermediate hard registers. */ - bool result = true; - int regno, n, code; -- rtx out, in, tem, insn; -+ rtx out, in, insn; - rtx last = get_last_insn (); + canon_loop (loop); - /* Make r2 a component of r1. */ -@@ -5573,11 +5616,7 @@ ++ if (dump_file) ++ { ++ fprintf (dump_file, ++ "%s:%d SMS succeeded %d %d (with ii, sc)\n", ++ insn_file (tail), insn_line (tail), ps->ii, stage_count); ++ print_partial_schedule (ps, dump_file); ++ } ++ + /* case the BCT count is not known , Do loop-versioning */ + if (count_reg && ! count_init) + { +@@ -1230,23 +1733,23 @@ + permute_partial_schedule (ps, g->closing_branch->first_note); - /* If IN is a paradoxical SUBREG, remove it and try to put the - opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ -- if (GET_CODE (in) == SUBREG -- && (GET_MODE_SIZE (GET_MODE (in)) -- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) -- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) -- in = SUBREG_REG (in), out = tem; -+ strip_paradoxical_subreg (&in, &out); + /* Mark this loop as software pipelined so the later +- scheduling passes doesn't touch it. */ ++ scheduling passes don't touch it. */ + if (! flag_resched_modulo_sched) +- g->bb->flags |= BB_DISABLE_SCHEDULE; ++ mark_loop_unsched (loop); ++ + /* The life-info is not valid any more. */ + df_set_bb_dirty (g->bb); - if (GET_CODE (in) == PLUS - && (REG_P (XEXP (in, 0)) -@@ -6449,6 +6488,8 @@ +- reg_move_replaces = generate_reg_moves (ps, true); ++ apply_reg_moves (ps); + if (dump_file) +- print_node_sched_params (dump_file, g->num_nodes, g); ++ print_node_sched_params (dump_file, g->num_nodes, ps); + /* Generate prolog and epilog. */ + generate_prolog_epilog (ps, loop, count_reg, count_init); +- +- free_undo_replace_buff (reg_move_replaces); ++ break; + } - if (regno >= 0 - && reg_last_reload_reg[regno] != 0 -+ && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno])) -+ >= GET_MODE_SIZE (mode) + byte) - #ifdef CANNOT_CHANGE_MODE_CLASS - /* Verify that the register it's in can be used in - mode MODE. */ -@@ -6460,24 +6501,12 @@ - { - enum reg_class rclass = rld[r].rclass, last_class; - rtx last_reg = reg_last_reload_reg[regno]; -- enum machine_mode need_mode; + free_partial_schedule (ps); +- free (node_sched_params); ++ VEC_free (node_sched_params, heap, node_sched_param_vec); + free (node_order); + free_ddg (g); + } +@@ -1347,19 +1850,21 @@ + scheduling window is empty and zero otherwise. */ - i = REGNO (last_reg); - i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode); - last_class = REGNO_REG_CLASS (i); + static int +-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, +- sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) ++get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, ++ sbitmap sched_nodes, int ii, int *start_p, int *step_p, ++ int *end_p) + { + int start, step, end; ++ int early_start, late_start; + ddg_edge_ptr e; +- int u = nodes_order [i]; +- ddg_node_ptr u_node = &ps->g->nodes[u]; + sbitmap psp = sbitmap_alloc (ps->g->num_nodes); + sbitmap pss = sbitmap_alloc (ps->g->num_nodes); + sbitmap u_node_preds = NODE_PREDECESSORS (u_node); + sbitmap u_node_succs = NODE_SUCCESSORS (u_node); + int psp_not_empty; + int pss_not_empty; ++ int count_preds; ++ int count_succs; -- if (byte == 0) -- need_mode = mode; -- else -- need_mode -- = smallest_mode_for_size -- (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT, -- GET_MODE_CLASS (mode) == MODE_PARTIAL_INT -- ? MODE_INT : GET_MODE_CLASS (mode)); + /* 1. compute sched window for u (start, end, step). */ + sbitmap_zero (psp); +@@ -1367,214 +1872,119 @@ + psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); + pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); + +- if (psp_not_empty && !pss_not_empty) +- { +- int early_start = INT_MIN; - -- if ((GET_MODE_SIZE (GET_MODE (last_reg)) -- >= GET_MODE_SIZE (need_mode)) -- && reg_reloaded_contents[i] == regno -+ if (reg_reloaded_contents[i] == regno - && TEST_HARD_REG_BIT (reg_reloaded_valid, i) - && HARD_REGNO_MODE_OK (i, rld[r].mode) - && (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i) -@@ -7579,7 +7608,6 @@ - if (tertiary_icode != CODE_FOR_nothing) - { - rtx third_reloadreg = rld[tertiary_reload].reg_rtx; -- rtx tem; +- end = INT_MAX; +- for (e = u_node->in; e != 0; e = e->next_in) +- { +- ddg_node_ptr v_node = e->src; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge: "); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_not_empty," +- " checking p %d (%d): ", u_node->cuid, +- INSN_UID (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int p_st = SCHED_TIME (v_node); +- +- early_start = +- MAX (early_start, p_st + e->latency - (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "pred st = %d; early_start = %d; latency: %d", +- p_st, early_start, e->latency); ++ /* We first compute a forward range (start <= end), then decide whether ++ to reverse it. */ ++ early_start = INT_MIN; ++ late_start = INT_MAX; ++ start = INT_MIN; ++ end = INT_MAX; ++ step = 1; ++ ++ count_preds = 0; ++ count_succs = 0; ++ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ { ++ fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)" ++ "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii); ++ fprintf (dump_file, "%11s %11s %11s %11s %5s\n", ++ "start", "early start", "late start", "end", "time"); ++ fprintf (dump_file, "=========== =========== =========== ===========" ++ " =====\n"); ++ } ++ /* Calculate early_start and limit end. Both bounds are inclusive. */ ++ if (psp_not_empty) ++ for (e = u_node->in; e != 0; e = e->next_in) ++ { ++ int v = e->src->cuid; - /* Copy primary reload reg to secondary reload reg. - (Note that these have been swapped above, then -@@ -7588,13 +7616,7 @@ - /* If REAL_OLD is a paradoxical SUBREG, remove it - and try to put the opposite SUBREG on - RELOADREG. */ -- if (GET_CODE (real_old) == SUBREG -- && (GET_MODE_SIZE (GET_MODE (real_old)) -- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old)))) -- && 0 != (tem = gen_lowpart_common -- (GET_MODE (SUBREG_REG (real_old)), -- reloadreg))) -- real_old = SUBREG_REG (real_old), reloadreg = tem; -+ strip_paradoxical_subreg (&real_old, &reloadreg); +- if (e->data_type == MEM_DEP) +- end = MIN (end, SCHED_TIME (v_node) + ii - 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- } +- start = early_start; +- end = MIN (end, early_start + ii); +- /* Schedule the node close to it's predecessors. */ +- step = 1; ++ if (TEST_BIT (sched_nodes, v)) ++ { ++ int p_st = SCHED_TIME (v); ++ int earliest = p_st + e->latency - (e->distance * ii); ++ int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); - gen_reload (reloadreg, second_reloadreg, - rl->opnum, rl->when_needed); -@@ -8410,16 +8432,8 @@ +- if (dump_file) +- fprintf (dump_file, +- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", +- u_node->cuid, INSN_UID (u_node->insn), start, end, step); +- } ++ if (dump_file) ++ { ++ fprintf (dump_file, "%11s %11d %11s %11d %5d", ++ "", earliest, "", latest, p_st); ++ print_ddg_edge (dump_file, e); ++ fprintf (dump_file, "\n"); ++ } - /* If IN is a paradoxical SUBREG, remove it and try to put the - opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ -- if (GET_CODE (in) == SUBREG -- && (GET_MODE_SIZE (GET_MODE (in)) -- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) -- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) -- in = SUBREG_REG (in), out = tem; -- else if (GET_CODE (out) == SUBREG -- && (GET_MODE_SIZE (GET_MODE (out)) -- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))) -- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0) -- out = SUBREG_REG (out), in = tem; -+ if (!strip_paradoxical_subreg (&in, &out)) -+ strip_paradoxical_subreg (&out, &in); +- else if (!psp_not_empty && pss_not_empty) +- { +- int late_start = INT_MAX; ++ early_start = MAX (early_start, earliest); ++ end = MIN (end, latest); - /* How to do this reload can get quite tricky. Normally, we are being - asked to reload a simple operand, such as a MEM, a constant, or a pseudo ---- a/src/gcc/reorg.c -+++ b/src/gcc/reorg.c -@@ -3554,9 +3554,11 @@ - } - } +- end = INT_MIN; +- for (e = u_node->out; e != 0; e = e->next_out) +- { +- ddg_node_ptr v_node = e->dest; ++ if (e->type == TRUE_DEP && e->data_type == REG_DEP) ++ count_preds++; ++ } ++ } -+ /* See if we have a simple (conditional) jump that is useless. */ - if (! INSN_ANNULLED_BRANCH_P (delay_insn) -- && prev_active_insn (target_label) == insn - && ! condjump_in_parallel_p (delay_insn) -+ && prev_active_insn (target_label) == insn -+ && ! BARRIER_P (prev_nonnote_insn (target_label)) - #ifdef HAVE_cc0 - /* If the last insn in the delay slot sets CC0 for some insn, - various code assumes that it is in a delay slot. We could ---- a/src/gcc/simplify-rtx.c -+++ b/src/gcc/simplify-rtx.c -@@ -1000,6 +1000,48 @@ - && GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF) - return XEXP (op, 0); +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in pss_not_empty," +- " checking s %d (%d): ", u_node->cuid, +- INSN_UID (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } ++ /* Calculate late_start and limit start. Both bounds are inclusive. */ ++ if (pss_not_empty) ++ for (e = u_node->out; e != 0; e = e->next_out) ++ { ++ int v = e->dest->cuid; -+ /* Extending a widening multiplication should be canonicalized to -+ a wider widening multiplication. */ -+ if (GET_CODE (op) == MULT) -+ { -+ rtx lhs = XEXP (op, 0); -+ rtx rhs = XEXP (op, 1); -+ enum rtx_code lcode = GET_CODE (lhs); -+ enum rtx_code rcode = GET_CODE (rhs); -+ -+ /* Widening multiplies usually extend both operands, but sometimes -+ they use a shift to extract a portion of a register. */ -+ if ((lcode == SIGN_EXTEND -+ || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1)))) -+ && (rcode == SIGN_EXTEND -+ || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1))))) -+ { -+ enum machine_mode lmode = GET_MODE (lhs); -+ enum machine_mode rmode = GET_MODE (rhs); -+ int bits; +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int s_st = SCHED_TIME (v_node); ++ if (TEST_BIT (sched_nodes, v)) ++ { ++ int s_st = SCHED_TIME (v); ++ int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); ++ int latest = s_st - e->latency + (e->distance * ii); + +- late_start = MIN (late_start, +- s_st - e->latency + (e->distance * ii)); ++ if (dump_file) ++ { ++ fprintf (dump_file, "%11d %11s %11d %11s %5d", ++ earliest, "", latest, "", s_st); ++ print_ddg_edge (dump_file, e); ++ fprintf (dump_file, "\n"); ++ } + +- if (dump_file) +- fprintf (dump_file, +- "succ st = %d; late_start = %d; latency = %d", +- s_st, late_start, e->latency); +- +- if (e->data_type == MEM_DEP) +- end = MAX (end, SCHED_TIME (v_node) - ii + 1); +- if (dump_file) +- fprintf (dump_file, "end = %d\n", end); ++ start = MAX (start, earliest); ++ late_start = MIN (late_start, latest); + +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); ++ if (e->type == TRUE_DEP && e->data_type == REG_DEP) ++ count_succs++; ++ } ++ } + +- } +- start = late_start; +- end = MAX (end, late_start - ii); +- /* Schedule the node close to it's successors. */ ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ { ++ fprintf (dump_file, "----------- ----------- ----------- -----------" ++ " -----\n"); ++ fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n", ++ start, early_start, late_start, end, "", ++ "(max, max, min, min)"); ++ } + -+ if (lcode == ASHIFTRT) -+ /* Number of bits not shifted off the end. */ -+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1)); -+ else /* lcode == SIGN_EXTEND */ -+ /* Size of inner mode. */ -+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0))); ++ /* Get a target scheduling window no bigger than ii. */ ++ if (early_start == INT_MIN && late_start == INT_MAX) ++ early_start = NODE_ASAP (u_node); ++ else if (early_start == INT_MIN) ++ early_start = late_start - (ii - 1); ++ late_start = MIN (late_start, early_start + (ii - 1)); + -+ if (rcode == ASHIFTRT) -+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1)); -+ else /* rcode == SIGN_EXTEND */ -+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0))); ++ /* Apply memory dependence limits. */ ++ start = MAX (start, early_start); ++ end = MIN (end, late_start); + -+ /* We can only widen multiplies if the result is mathematiclly -+ equivalent. I.e. if overflow was impossible. */ -+ if (bits <= GET_MODE_PRECISION (GET_MODE (op))) -+ return simplify_gen_binary -+ (MULT, mode, -+ simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode), -+ simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode)); -+ } -+ } ++ if (dump_file && (psp_not_empty || pss_not_empty)) ++ fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n", ++ "", start, end, "", ""); + - /* Check for a sign extension of a subreg of a promoted - variable, where the promotion is sign-extended, and the - target mode is the same as the variable's promotion. */ -@@ -1071,6 +1113,48 @@ - && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))) - return rtl_hooks.gen_lowpart_no_emit (mode, op); ++ /* If there are at least as many successors as predecessors, schedule the ++ node close to its successors. */ ++ if (pss_not_empty && count_succs >= count_preds) ++ { ++ int tmp = end; ++ end = start; ++ start = tmp; + step = -1; +- +- if (dump_file) +- fprintf (dump_file, +- "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", +- u_node->cuid, INSN_UID (u_node->insn), start, end, step); +- + } -+ /* Extending a widening multiplication should be canonicalized to -+ a wider widening multiplication. */ -+ if (GET_CODE (op) == MULT) -+ { -+ rtx lhs = XEXP (op, 0); -+ rtx rhs = XEXP (op, 1); -+ enum rtx_code lcode = GET_CODE (lhs); -+ enum rtx_code rcode = GET_CODE (rhs); -+ -+ /* Widening multiplies usually extend both operands, but sometimes -+ they use a shift to extract a portion of a register. */ -+ if ((lcode == ZERO_EXTEND -+ || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1)))) -+ && (rcode == ZERO_EXTEND -+ || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1))))) -+ { -+ enum machine_mode lmode = GET_MODE (lhs); -+ enum machine_mode rmode = GET_MODE (rhs); -+ int bits; -+ -+ if (lcode == LSHIFTRT) -+ /* Number of bits not shifted off the end. */ -+ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1)); -+ else /* lcode == ZERO_EXTEND */ -+ /* Size of inner mode. */ -+ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0))); -+ -+ if (rcode == LSHIFTRT) -+ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1)); -+ else /* rcode == ZERO_EXTEND */ -+ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0))); -+ -+ /* We can only widen multiplies if the result is mathematiclly -+ equivalent. I.e. if overflow was impossible. */ -+ if (bits <= GET_MODE_PRECISION (GET_MODE (op))) -+ return simplify_gen_binary -+ (MULT, mode, -+ simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode), -+ simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode)); -+ } -+ } -+ - /* (zero_extend:M (zero_extend:N )) is (zero_extend:M ). */ - if (GET_CODE (op) == ZERO_EXTEND) - return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0), -@@ -2506,6 +2590,46 @@ - XEXP (op0, 1), mode), - op1); +- else if (psp_not_empty && pss_not_empty) +- { +- int early_start = INT_MIN; +- int late_start = INT_MAX; +- int count_preds = 0; +- int count_succs = 0; +- +- start = INT_MIN; +- end = INT_MAX; +- for (e = u_node->in; e != 0; e = e->next_in) +- { +- ddg_node_ptr v_node = e->src; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_pss_not_empty," +- " checking p %d (%d): ", u_node->cuid, INSN_UID +- (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int p_st = SCHED_TIME (v_node); +- +- early_start = MAX (early_start, +- p_st + e->latency +- - (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "pred st = %d; early_start = %d; latency = %d", +- p_st, early_start, e->latency); +- +- if (e->type == TRUE_DEP && e->data_type == REG_DEP) +- count_preds++; +- +- if (e->data_type == MEM_DEP) +- end = MIN (end, SCHED_TIME (v_node) + ii - 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- for (e = u_node->out; e != 0; e = e->next_out) +- { +- ddg_node_ptr v_node = e->dest; +- +- if (dump_file) +- { +- fprintf (dump_file, "\nProcessing edge:"); +- print_ddg_edge (dump_file, e); +- fprintf (dump_file, +- "\nScheduling %d (%d) in psp_pss_not_empty," +- " checking s %d (%d): ", u_node->cuid, INSN_UID +- (u_node->insn), v_node->cuid, INSN_UID +- (v_node->insn)); +- } +- +- if (TEST_BIT (sched_nodes, v_node->cuid)) +- { +- int s_st = SCHED_TIME (v_node); +- +- late_start = MIN (late_start, +- s_st - e->latency +- + (e->distance * ii)); +- +- if (dump_file) +- fprintf (dump_file, +- "succ st = %d; late_start = %d; latency = %d", +- s_st, late_start, e->latency); +- +- if (e->type == TRUE_DEP && e->data_type == REG_DEP) +- count_succs++; +- +- if (e->data_type == MEM_DEP) +- start = MAX (start, SCHED_TIME (v_node) - ii + 1); +- } +- else if (dump_file) +- fprintf (dump_file, "the node is not scheduled\n"); +- +- } +- start = MAX (start, early_start); +- end = MIN (end, MIN (early_start + ii, late_start + 1)); +- step = 1; +- /* If there are more successors than predecessors schedule the +- node close to it's successors. */ +- if (count_succs >= count_preds) +- { +- int old_start = start; +- +- start = end - 1; +- end = old_start - 1; +- step = -1; +- } +- } +- else /* psp is empty && pss is empty. */ +- { +- start = SCHED_ASAP (u_node); +- end = start + ii; +- step = 1; +- } ++ /* Now that we've finalized the window, make END an exclusive rather ++ than an inclusive bound. */ ++ end += step; -+ /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P), -+ we can transform like this: -+ (A&B)^C == ~(A&B)&C | ~C&(A&B) -+ == (~A|~B)&C | ~C&(A&B) * DeMorgan's Law -+ == ~A&C | ~B&C | A&(~C&B) * Distribute and re-order -+ Attempt a few simplifications when B and C are both constants. */ -+ if (GET_CODE (op0) == AND -+ && CONST_INT_P (op1) -+ && CONST_INT_P (XEXP (op0, 1))) -+ { -+ rtx a = XEXP (op0, 0); -+ rtx b = XEXP (op0, 1); -+ rtx c = op1; -+ HOST_WIDE_INT bval = INTVAL (b); -+ HOST_WIDE_INT cval = INTVAL (c); -+ -+ rtx na_c -+ = simplify_binary_operation (AND, mode, -+ simplify_gen_unary (NOT, mode, a, mode), -+ c); -+ if ((~cval & bval) == 0) -+ { -+ /* Try to simplify ~A&C | ~B&C. */ -+ if (na_c != NULL_RTX) -+ return simplify_gen_binary (IOR, mode, na_c, -+ GEN_INT (~bval & cval)); -+ } -+ else -+ { -+ /* If ~A&C is zero, simplify A&(~C&B) | ~B&C. */ -+ if (na_c == const0_rtx) -+ { -+ rtx a_nc_b = simplify_gen_binary (AND, mode, a, -+ GEN_INT (~cval & bval)); -+ return simplify_gen_binary (IOR, mode, a_nc_b, -+ GEN_INT (~bval & cval)); -+ } -+ } -+ } -+ - /* (xor (comparison foo bar) (const_int 1)) can become the reversed - comparison if STORE_FLAG_VALUE is 1. */ - if (STORE_FLAG_VALUE == 1 -@@ -5443,6 +5567,7 @@ - /* Optimize SUBREG truncations of zero and sign extended values. */ - if ((GET_CODE (op) == ZERO_EXTEND - || GET_CODE (op) == SIGN_EXTEND) -+ && SCALAR_INT_MODE_P (innermode) - && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)) - { - unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); -@@ -5481,6 +5606,7 @@ - if ((GET_CODE (op) == LSHIFTRT - || GET_CODE (op) == ASHIFTRT) - && SCALAR_INT_MODE_P (outermode) -+ && SCALAR_INT_MODE_P (innermode) - /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE - to avoid the possibility that an outer LSHIFTRT shifts by more - than the sign extension's sign_bit_copies and introduces zeros -@@ -5500,6 +5626,7 @@ - if ((GET_CODE (op) == LSHIFTRT - || GET_CODE (op) == ASHIFTRT) - && SCALAR_INT_MODE_P (outermode) -+ && SCALAR_INT_MODE_P (innermode) - && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) - && CONST_INT_P (XEXP (op, 1)) - && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND -@@ -5514,6 +5641,7 @@ - the outer subreg is effectively a truncation to the original mode. */ - if (GET_CODE (op) == ASHIFT - && SCALAR_INT_MODE_P (outermode) -+ && SCALAR_INT_MODE_P (innermode) - && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) - && CONST_INT_P (XEXP (op, 1)) - && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND -@@ -5527,7 +5655,7 @@ - /* Recognize a word extraction from a multi-word subreg. */ - if ((GET_CODE (op) == LSHIFTRT - || GET_CODE (op) == ASHIFTRT) -- && SCALAR_INT_MODE_P (outermode) -+ && SCALAR_INT_MODE_P (innermode) - && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD - && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode)) - && CONST_INT_P (XEXP (op, 1)) -@@ -5549,6 +5677,7 @@ + *start_p = start; + *step_p = step; +@@ -1587,10 +1997,10 @@ + if (dump_file) + fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", + start, end, step); +- return -1; ++ return -1; + } - if ((GET_CODE (op) == LSHIFTRT - || GET_CODE (op) == ASHIFTRT) -+ && SCALAR_INT_MODE_P (innermode) - && MEM_P (XEXP (op, 0)) - && CONST_INT_P (XEXP (op, 1)) - && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op)) ---- a/src/gcc/stor-layout.c -+++ b/src/gcc/stor-layout.c -@@ -546,6 +546,34 @@ - return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT)); +- return 0; ++ return 0; } -+/* Return the natural mode of an array, given that it is SIZE bytes in -+ total and has elements of type ELEM_TYPE. */ -+ -+static enum machine_mode -+mode_for_array (tree elem_type, tree size) -+{ -+ tree elem_size; -+ unsigned HOST_WIDE_INT int_size, int_elem_size; -+ bool limit_p; -+ -+ /* One-element arrays get the component type's mode. */ -+ elem_size = TYPE_SIZE (elem_type); -+ if (simple_cst_equal (size, elem_size)) -+ return TYPE_MODE (elem_type); -+ -+ limit_p = true; -+ if (host_integerp (size, 1) && host_integerp (elem_size, 1)) -+ { -+ int_size = tree_low_cst (size, 1); -+ int_elem_size = tree_low_cst (elem_size, 1); -+ if (int_elem_size > 0 -+ && int_size % int_elem_size == 0 -+ && targetm.array_mode_supported_p (TYPE_MODE (elem_type), -+ int_size / int_elem_size)) -+ limit_p = false; -+ } -+ return mode_for_size_tree (size, MODE_INT, limit_p); -+} - - /* Subroutine of layout_decl: Force alignment required for the data type. - But if the decl itself wants greater alignment, don't override that. */ -@@ -2039,14 +2067,8 @@ - && (TYPE_MODE (TREE_TYPE (type)) != BLKmode - || TYPE_NO_FORCE_BLK (TREE_TYPE (type)))) - { -- /* One-element arrays get the component type's mode. */ -- if (simple_cst_equal (TYPE_SIZE (type), -- TYPE_SIZE (TREE_TYPE (type)))) -- SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type))); -- else -- SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type), -- MODE_INT, 1)); -- -+ SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type), -+ TYPE_SIZE (type))); - if (TYPE_MODE (type) != BLKmode - && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT - && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type))) ---- a/src/gcc/target.def -+++ b/src/gcc/target.def -@@ -1344,6 +1344,13 @@ - unsigned, (unsigned nunroll, struct loop *loop), - NULL) + /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the +@@ -1646,7 +2056,7 @@ + SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ + for (e = u_node->in; e != 0; e = e->next_in) + if (TEST_BIT (sched_nodes, e->src->cuid) +- && ((SCHED_TIME (e->src) - (e->distance * ii)) == ++ && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == + first_cycle_in_window)) + { + if (dump_file) +@@ -1671,7 +2081,7 @@ + SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ + for (e = u_node->out; e != 0; e = e->next_out) + if (TEST_BIT (sched_nodes, e->dest->cuid) +- && ((SCHED_TIME (e->dest) + (e->distance * ii)) == ++ && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == + last_cycle_in_window)) + { + if (dump_file) +@@ -1695,7 +2105,7 @@ + last row of the scheduling window) */ -+/* True if X is a legitimate MODE-mode immediate operand. */ -+DEFHOOK -+(legitimate_constant_p, -+ "", -+ bool, (enum machine_mode mode, rtx x), -+ default_legitimate_constant_p) -+ - /* True if the constant X cannot be placed in the constant pool. */ - DEFHOOK - (cannot_force_const_mem, -@@ -1611,6 +1618,38 @@ - bool, (enum machine_mode mode), - hook_bool_mode_false) + static bool +-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, ++try_scheduling_node_in_cycle (partial_schedule_ptr ps, + int u, int cycle, sbitmap sched_nodes, + int *num_splits, sbitmap must_precede, + sbitmap must_follow) +@@ -1704,11 +2114,10 @@ + bool success = 0; -+/* True if we should try to use a scalar mode to represent an array, -+ overriding the usual MAX_FIXED_MODE limit. */ -+DEFHOOK -+(array_mode_supported_p, -+ "Return true if GCC should try to use a scalar mode to store an array\n\ -+of @var{nelems} elements, given that each element has mode @var{mode}.\n\ -+Returning true here overrides the usual @code{MAX_FIXED_MODE} limit\n\ -+and allows GCC to use any defined integer mode.\n\ -+\n\ -+One use of this hook is to support vector load and store operations\n\ -+that operate on several homogeneous vectors. For example, ARM NEON\n\ -+has operations like:\n\ -+\n\ -+@smallexample\n\ -+int8x8x3_t vld3_s8 (const int8_t *)\n\ -+@end smallexample\n\ -+\n\ -+where the return type is defined as:\n\ -+\n\ -+@smallexample\n\ -+typedef struct int8x8x3_t\n\ -+@{\n\ -+ int8x8_t val[3];\n\ -+@} int8x8x3_t;\n\ -+@end smallexample\n\ -+\n\ -+If this hook allows @code{val} to have a scalar mode, then\n\ -+@code{int8x8x3_t} can have the same mode. GCC can then store\n\ -+@code{int8x8x3_t}s in registers rather than forcing them onto the stack.", -+ bool, (enum machine_mode mode, unsigned HOST_WIDE_INT nelems), -+ hook_bool_mode_uhwi_false) -+ - /* Compute cost of moving data from a register of class FROM to one of - TO, using MODE. */ - DEFHOOK ---- a/src/gcc/targhooks.c -+++ b/src/gcc/targhooks.c -@@ -1519,4 +1519,15 @@ - { OPT_LEVELS_NONE, 0, NULL, 0 } - }; + verify_partial_schedule (ps, sched_nodes); +- psi = ps_add_node_check_conflicts (ps, u_node, cycle, +- must_precede, must_follow); ++ psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); + if (psi) + { +- SCHED_TIME (u_node) = cycle; ++ SCHED_TIME (u) = cycle; + SET_BIT (sched_nodes, u); + success = 1; + *num_splits = 0; +@@ -1760,23 +2169,17 @@ + continue; + } -+bool -+default_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, -+ rtx x ATTRIBUTE_UNUSED) -+{ -+#ifdef LEGITIMATE_CONSTANT_P -+ return LEGITIMATE_CONSTANT_P (x); -+#else -+ return true; -+#endif -+} -+ - #include "gt-targhooks.h" ---- a/src/gcc/targhooks.h -+++ b/src/gcc/targhooks.h -@@ -183,3 +183,4 @@ +- if (JUMP_P (insn)) /* Closing branch handled later. */ +- { +- RESET_BIT (tobe_scheduled, u); +- continue; +- } +- + if (TEST_BIT (sched_nodes, u)) + continue; - extern void *default_get_pch_validity (size_t *); - extern const char *default_pch_valid_p (const void *, size_t); -+extern bool default_legitimate_constant_p (enum machine_mode, rtx); ---- a/src/gcc/testsuite/ChangeLog -+++ b/src/gcc/testsuite/ChangeLog -@@ -1,3 +1,69 @@ -+2011-11-25 Tobias Burnus -+ -+ PR fortran/50408 -+ * gfortran.dg/whole_file_35.f90: New. -+ -+2011-11-24 Tobias Burnus -+ -+ PR fortran/51218 -+ * resolve.c (pure_subroutine): If called subroutine is -+ impure, unset implicit_pure. -+ (resolve_function): Move impure check to simplify code. -+ -+2011-11-22 Paolo Carlini -+ -+ PR c++/51265 -+ * g++.dg/cpp0x/decltype36.C: New. -+ -+2011-11-19 Eric Botcazou -+ -+ * gcc.dg/delay-slot-2.c: New test. -+ -+2011-11-18 Joseph Myers -+ -+ * gcc.dg/cpp/assert4.c: Test __linux__, not __gnu_linux__. -+ -+2011-11-18 Paolo Carlini -+ -+ PR c++/51150 -+ * g++.dg/cpp0x/pr51150.C: New. -+ -+2011-11-16 Richard Earnshaw -+ Bernd Schmidt -+ Sebastian Huber -+ -+ PR target/49641 -+ * gcc.target/arm/pr49641.c: New test. -+ -+2011-11-10 Jakub Jelinek -+ -+ PR middle-end/51077 -+ * gcc.c-torture/compile/pr51077.c: New test. -+ -+2011-11-07 Jason Merrill -+ -+ PR c++/50870 -+ * g++.dg/cpp0x/decltype35.C: New. -+ -+2011-11-04 Eric Botcazou -+ -+ * g++.dg/other/offsetof7.C: New test. -+ -+2011-11-02 Bernd Schmidt -+ -+ * gcc.c-torture/compile/20110907.c: New file. -+ -+2011-10-29 Paolo Carlini -+ -+ PR c++/50901 -+ * g++.dg/cpp0x/pr50901.C: New. -+ -+2011-10-27 Uros Bizjak -+ Steven G. Kargl -+ -+ PR target/50875 -+ * gfortran.dg/pr50875.f90: New test. -+ - 2011-10-26 Release Manager + /* Try to get non-empty scheduling window. */ + success = 0; +- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, ++ if (get_sched_window (ps, u_node, sched_nodes, ii, &start, + &step, &end) == 0) + { + if (dump_file) +- fprintf (dump_file, "\nTrying to schedule node %d \ +- INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID ++ fprintf (dump_file, "\nTrying to schedule node %d " ++ "INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID + (g->nodes[u].insn)), start, end, step); - * GCC 4.6.2 released. ---- a/src/gcc/testsuite/g++.dg/cpp0x/decltype35.C -+++ b/src/gcc/testsuite/g++.dg/cpp0x/decltype35.C -@@ -0,0 +1,15 @@ -+// PR c++/50870 -+// { dg-options -std=c++0x } -+ -+template -+ struct impl -+ { -+ template static T create(); -+ }; -+ -+template ::template create() -+ -> impl::template create())> -+struct tester { }; -+ -+tester*, int, float> ti; ---- a/src/gcc/testsuite/g++.dg/cpp0x/decltype36.C -+++ b/src/gcc/testsuite/g++.dg/cpp0x/decltype36.C -@@ -0,0 +1,21 @@ -+// PR c++/51265 -+// { dg-options -std=c++0x } -+ -+struct Funny -+{ -+ int print(int); -+}; -+ -+template -+void c(); -+ -+template -+void xx() -+{ -+ c(); -+} -+ -+int main() -+{ -+ xx(); -+} ---- a/src/gcc/testsuite/g++.dg/cpp0x/pr50901.C -+++ b/src/gcc/testsuite/g++.dg/cpp0x/pr50901.C -@@ -0,0 +1,9 @@ -+// { dg-options "-std=c++0x" } -+ -+template int foo(int a) -+{ -+ const unsigned b = a < 0 ? -a : a; -+ return 0; -+} -+ -+int i = foo(1); ---- a/src/gcc/testsuite/g++.dg/cpp0x/pr51150.C -+++ b/src/gcc/testsuite/g++.dg/cpp0x/pr51150.C -@@ -0,0 +1,20 @@ -+// PR c++/51150 -+// { dg-options "-std=c++0x" } -+ -+struct Clock { -+ double Now(); -+}; -+template void Foo(Clock* clock) { -+ const int now = clock->Now(); -+} -+ -+template void Foo(Clock*); -+ -+template void Boo(int val) { -+ const int now1 = (double)(val); -+ const int now2 = const_cast(val); // { dg-error "invalid" } -+ const int now3 = static_cast(val); -+ const int now4 = reinterpret_cast(val); // { dg-error "invalid" } -+} + gcc_assert ((step > 0 && start < end) +@@ -1788,26 +2191,13 @@ + + for (c = start; c != end; c += step) + { +- sbitmap tmp_precede = NULL; +- sbitmap tmp_follow = NULL; +- +- if (c == start) +- { +- if (step == 1) +- tmp_precede = must_precede; +- else /* step == -1. */ +- tmp_follow = must_follow; +- } +- if (c == end - step) +- { +- if (step == 1) +- tmp_follow = must_follow; +- else /* step == -1. */ +- tmp_precede = must_precede; +- } ++ sbitmap tmp_precede, tmp_follow; + ++ set_must_precede_follow (&tmp_follow, must_follow, ++ &tmp_precede, must_precede, ++ c, start, end, step); + success = +- try_scheduling_node_in_cycle (ps, u_node, u, c, ++ try_scheduling_node_in_cycle (ps, u, c, + sched_nodes, + &num_splits, tmp_precede, + tmp_follow); +@@ -1883,6 +2273,7 @@ + int ii = ps->ii; + int new_ii = ii + 1; + int row; ++ int *rows_length_new; + + verify_partial_schedule (ps, sched_nodes); + +@@ -1893,18 +2284,20 @@ + if (dump_file) + fprintf (dump_file, "split_row=%d\n", split_row); + +- normalize_sched_times (ps); +- rotate_partial_schedule (ps, ps->min_cycle); ++ reset_sched_times (ps, PS_MIN_CYCLE (ps)); ++ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); + + rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); ++ rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); + for (row = 0; row < split_row; row++) + { + rows_new[row] = ps->rows[row]; ++ rows_length_new[row] = ps->rows_length[row]; + ps->rows[row] = NULL; + for (crr_insn = rows_new[row]; + crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); + + SCHED_TIME (u) = new_time; +@@ -1920,11 +2313,12 @@ + for (row = split_row; row < ii; row++) + { + rows_new[row + 1] = ps->rows[row]; ++ rows_length_new[row + 1] = ps->rows_length[row]; + ps->rows[row] = NULL; + for (crr_insn = rows_new[row + 1]; + crr_insn; crr_insn = crr_insn->next_in_row) + { +- ddg_node_ptr u = crr_insn->node; ++ int u = crr_insn->id; + int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; + + SCHED_TIME (u) = new_time; +@@ -1941,6 +2335,8 @@ + + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); + free (ps->rows); + ps->rows = rows_new; ++ free (ps->rows_length); ++ ps->rows_length = rows_length_new; + ps->ii = new_ii; + gcc_assert (ps->min_cycle >= 0); + +@@ -1962,24 +2358,24 @@ + { + ddg_edge_ptr e; + int lower = INT_MIN, upper = INT_MAX; +- ddg_node_ptr crit_pred = NULL; +- ddg_node_ptr crit_succ = NULL; ++ int crit_pred = -1; ++ int crit_succ = -1; + int crit_cycle; + + for (e = u_node->in; e != 0; e = e->next_in) + { +- ddg_node_ptr v_node = e->src; ++ int v = e->src->cuid; + +- if (TEST_BIT (sched_nodes, v_node->cuid) +- && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) +- if (SCHED_TIME (v_node) > lower) ++ if (TEST_BIT (sched_nodes, v) ++ && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) ++ if (SCHED_TIME (v) > lower) + { +- crit_pred = v_node; +- lower = SCHED_TIME (v_node); ++ crit_pred = v; ++ lower = SCHED_TIME (v); + } + } + +- if (crit_pred != NULL) ++ if (crit_pred >= 0) + { + crit_cycle = SCHED_TIME (crit_pred) + 1; + return SMODULO (crit_cycle, ii); +@@ -1987,17 +2383,18 @@ + + for (e = u_node->out; e != 0; e = e->next_out) + { +- ddg_node_ptr v_node = e->dest; +- if (TEST_BIT (sched_nodes, v_node->cuid) +- && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) +- if (SCHED_TIME (v_node) < upper) ++ int v = e->dest->cuid; + -+template void Boo(int); ---- a/src/gcc/testsuite/g++.dg/other/offsetof7.C -+++ b/src/gcc/testsuite/g++.dg/other/offsetof7.C -@@ -0,0 +1,17 @@ -+// PR c++/50608 -+// Testcase by -+// { dg-do compile } ++ if (TEST_BIT (sched_nodes, v) ++ && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) ++ if (SCHED_TIME (v) < upper) + { +- crit_succ = v_node; +- upper = SCHED_TIME (v_node); ++ crit_succ = v; ++ upper = SCHED_TIME (v); + } + } + +- if (crit_succ != NULL) ++ if (crit_succ >= 0) + { + crit_cycle = SCHED_TIME (crit_succ); + return SMODULO (crit_cycle, ii); +@@ -2016,16 +2413,23 @@ + ps_insn_ptr crr_insn; + + for (row = 0; row < ps->ii; row++) +- for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) +- { +- ddg_node_ptr u = crr_insn->node; +- +- gcc_assert (TEST_BIT (sched_nodes, u->cuid)); +- /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by +- popcount (sched_nodes) == number of insns in ps. */ +- gcc_assert (SCHED_TIME (u) >= ps->min_cycle); +- gcc_assert (SCHED_TIME (u) <= ps->max_cycle); +- } ++ { ++ int length = 0; ++ ++ for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) ++ { ++ int u = crr_insn->id; ++ ++ length++; ++ gcc_assert (TEST_BIT (sched_nodes, u)); ++ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by ++ popcount (sched_nodes) == number of insns in ps. */ ++ gcc_assert (SCHED_TIME (u) >= ps->min_cycle); ++ gcc_assert (SCHED_TIME (u) <= ps->max_cycle); ++ } ++ ++ gcc_assert (ps->rows_length[row] == length); ++ } + } + + +@@ -2431,6 +2835,8 @@ + { + partial_schedule_ptr ps = XNEW (struct partial_schedule); + ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); ++ ps->rows_length = (int *) xcalloc (ii, sizeof (int)); ++ ps->reg_moves = NULL; + ps->ii = ii; + ps->history = history; + ps->min_cycle = INT_MAX; +@@ -2465,10 +2871,19 @@ + static void + free_partial_schedule (partial_schedule_ptr ps) + { ++ ps_reg_move_info *move; ++ unsigned int i; + -+struct A { -+ int offset; -+}; + if (!ps) + return; + -+struct B: public A { -+}; ++ FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move) ++ sbitmap_free (move->uses); ++ VEC_free (ps_reg_move_info, heap, ps->reg_moves); + -+struct C { -+ A a; -+ B b; -+}; + free_ps_insns (ps); + free (ps->rows); ++ free (ps->rows_length); + free (ps); + } + +@@ -2486,6 +2901,8 @@ + ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii + * sizeof (ps_insn_ptr)); + memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); ++ ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); ++ memset (ps->rows_length, 0, new_ii * sizeof (int)); + ps->ii = new_ii; + ps->min_cycle = INT_MAX; + ps->max_cycle = INT_MIN; +@@ -2505,8 +2922,13 @@ + fprintf (dump, "\n[ROW %d ]: ", i); + while (ps_i) + { +- fprintf (dump, "%d, ", +- INSN_UID (ps_i->node->insn)); ++ rtx insn = ps_rtl_insn (ps, ps_i->id); + -+int fails = __builtin_offsetof (C, b.offset); ---- a/src/gcc/testsuite/gcc.c-torture/compile/20110401-1.c -+++ b/src/gcc/testsuite/gcc.c-torture/compile/20110401-1.c -@@ -0,0 +1,22 @@ -+void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len) -+{ -+ int k; -+ unsigned char temp[4]; -+ if (len < 128) { -+ if (ans != ((void *) 0)) -+ ans[0] = (unsigned char) len; -+ *ans_len = 1; -+ } else { -+ k = 0; -+ while (len) { -+ temp[k++] = len & 0xFF; -+ len = len >> 8; -+ } -+ *ans_len = k + 1; -+ if (ans != ((void *) 0)) { -+ ans[0] = ((unsigned char) k & 0x7F) + 128; -+ while (k--) -+ ans[*ans_len - 1 - k] = temp[k]; ++ if (JUMP_P (insn)) ++ fprintf (dump, "%d (branch), ", INSN_UID (insn)); ++ else ++ fprintf (dump, "%d, ", INSN_UID (insn)); ++ + ps_i = ps_i->next_in_row; + } + } +@@ -2514,36 +2936,31 @@ + + /* Creates an object of PS_INSN and initializes it to the given parameters. */ + static ps_insn_ptr +-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) ++create_ps_insn (int id, int cycle) + { + ps_insn_ptr ps_i = XNEW (struct ps_insn); + +- ps_i->node = node; ++ ps_i->id = id; + ps_i->next_in_row = NULL; + ps_i->prev_in_row = NULL; +- ps_i->row_rest_count = rest_count; + ps_i->cycle = cycle; + + return ps_i; + } + + +-/* Removes the given PS_INSN from the partial schedule. Returns false if the +- node is not found in the partial schedule, else returns true. */ +-static bool ++/* Removes the given PS_INSN from the partial schedule. */ ++static void + remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) + { + int row; + +- if (!ps || !ps_i) +- return false; +- ++ gcc_assert (ps && ps_i); ++ + row = SMODULO (ps_i->cycle, ps->ii); + if (! ps_i->prev_in_row) + { +- if (ps_i != ps->rows[row]) +- return false; +- ++ gcc_assert (ps_i == ps->rows[row]); + ps->rows[row] = ps_i->next_in_row; + if (ps->rows[row]) + ps->rows[row]->prev_in_row = NULL; +@@ -2554,8 +2971,10 @@ + if (ps_i->next_in_row) + ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; + } ++ ++ ps->rows_length[row] -= 1; + free (ps_i); +- return true; ++ return; + } + + /* Unlike what literature describes for modulo scheduling (which focuses +@@ -2571,6 +2990,7 @@ + ps_insn_ptr next_ps_i; + ps_insn_ptr first_must_follow = NULL; + ps_insn_ptr last_must_precede = NULL; ++ ps_insn_ptr last_in_row = NULL; + int row; + + if (! ps_i) +@@ -2585,10 +3005,11 @@ + next_ps_i; + next_ps_i = next_ps_i->next_in_row) + { +- if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) ++ if (must_follow ++ && TEST_BIT (must_follow, next_ps_i->id) + && ! first_must_follow) + first_must_follow = next_ps_i; +- if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) ++ if (must_precede && TEST_BIT (must_precede, next_ps_i->id)) + { + /* If we have already met a node that must follow, then + there is no possible column. */ +@@ -2597,8 +3018,37 @@ + else + last_must_precede = next_ps_i; + } ++ /* The closing branch must be the last in the row. */ ++ if (must_precede ++ && TEST_BIT (must_precede, next_ps_i->id) ++ && JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) ++ return false; ++ ++ last_in_row = next_ps_i; + } + ++ /* The closing branch is scheduled as well. Make sure there is no ++ dependent instruction after it as the branch should be the last ++ instruction in the row. */ ++ if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) ++ { ++ if (first_must_follow) ++ return false; ++ if (last_in_row) ++ { ++ /* Make the branch the last in the row. New instructions ++ will be inserted at the beginning of the row or after the ++ last must_precede instruction thus the branch is guaranteed ++ to remain the last instruction in the row. */ ++ last_in_row->next_in_row = ps_i; ++ ps_i->prev_in_row = last_in_row; ++ ps_i->next_in_row = NULL; + } -+ } -+} ---- a/src/gcc/testsuite/gcc.c-torture/compile/20110913-1.c -+++ b/src/gcc/testsuite/gcc.c-torture/compile/20110913-1.c -@@ -0,0 +1,26 @@ -+struct ieee754_double { -+ double d; -+}; -+extern const float __exp_deltatable[178]; -+float __ieee754_expf (float x) -+{ -+ static const float himark = 88.72283935546875; -+ static const float lomark = -103.972084045410; -+ if (__builtin_isless(x, himark) && __builtin_isgreater(x, lomark)) -+ { -+ int tval; -+ double x22, t, result, dx; -+ float delta; -+ struct ieee754_double ex2_u; -+ dx -= t; -+ tval = (int) (t * 512.0); -+ if (t >= 0) -+ delta = - __exp_deltatable[tval]; + else -+ delta = __exp_deltatable[-tval]; -+ x22 = (0.5000000496709180453 * dx + 1.0000001192102037084) * dx + delta; -+ result = x22 * ex2_u.d + ex2_u.d; -+ return (float) result; ++ ps->rows[row] = ps_i; ++ return true; + } -+ return x; -+} ---- a/src/gcc/testsuite/gcc.c-torture/compile/pr51077.c -+++ b/src/gcc/testsuite/gcc.c-torture/compile/pr51077.c -@@ -0,0 +1,15 @@ -+/* PR middle-end/51077 */ -+ -+struct S { unsigned char s, t[256]; }; -+ -+void -+foo (const struct S *x, struct S *y, int z) ++ + /* Now insert the node after INSERT_AFTER_PSI. */ + + if (! last_must_precede) +@@ -2631,7 +3081,6 @@ + { + ps_insn_ptr prev, next; + int row; +- ddg_node_ptr next_node; + + if (!ps || !ps_i) + return false; +@@ -2641,11 +3090,9 @@ + if (! ps_i->next_in_row) + return false; + +- next_node = ps_i->next_in_row->node; +- + /* Check if next_in_row is dependent on ps_i, both having same sched + times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ +- if (must_follow && TEST_BIT (must_follow, next_node->cuid)) ++ if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id)) + return false; + + /* Advance PS_I over its next_in_row in the doubly linked list. */ +@@ -2676,21 +3123,16 @@ + before/after (respectively) the node pointed to by PS_I when scheduled + in the same cycle. */ + static ps_insn_ptr +-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, ++add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, + sbitmap must_precede, sbitmap must_follow) + { + ps_insn_ptr ps_i; +- int rest_count = 1; + int row = SMODULO (cycle, ps->ii); + +- if (ps->rows[row] +- && ps->rows[row]->row_rest_count >= issue_rate) ++ if (ps->rows_length[row] >= issue_rate) + return NULL; + +- if (ps->rows[row]) +- rest_count += ps->rows[row]->row_rest_count; +- +- ps_i = create_ps_insn (node, rest_count, cycle); ++ ps_i = create_ps_insn (id, cycle); + + /* Finds and inserts PS_I according to MUST_FOLLOW and + MUST_PRECEDE. */ +@@ -2700,6 +3142,7 @@ + return NULL; + } + ++ ps->rows_length[row] += 1; + return ps_i; + } + +@@ -2741,7 +3184,7 @@ + crr_insn; + crr_insn = crr_insn->next_in_row) + { +- rtx insn = crr_insn->node->insn; ++ rtx insn = ps_rtl_insn (ps, crr_insn->id); + + if (!NONDEBUG_INSN_P (insn)) + continue; +@@ -2778,7 +3221,7 @@ + cuid N must be come before/after (respectively) the node pointed to by + PS_I when scheduled in the same cycle. */ + ps_insn_ptr +-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, ++ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, + int c, sbitmap must_precede, + sbitmap must_follow) + { +@@ -2820,6 +3263,22 @@ + return ps_i; + } + ++/* Calculate the stage count of the partial schedule PS. The calculation ++ takes into account the rotation amount passed in ROTATION_AMOUNT. */ ++int ++calculate_stage_count (partial_schedule_ptr ps, int rotation_amount) +{ -+ int i; -+ for (i = 0; i < 8; i++) -+ { -+ const struct S *a = &x[i]; -+ __builtin___memcpy_chk (y->t, a->t, z, __builtin_object_size (y->t, 0)); -+ y = (struct S *) &y->t[z]; -+ } -+} ---- a/src/gcc/testsuite/gcc.dg/cpp/assert4.c -+++ b/src/gcc/testsuite/gcc.dg/cpp/assert4.c -@@ -1,4 +1,4 @@ --/* Copyright (C) 2003, 2006, 2008 Free Software Foundation, Inc. -+/* Copyright (C) 2003, 2006, 2008, 2009, 2011 Free Software Foundation, Inc. - Test builtin preprocessor assertions. - By Kaveh Ghazi . */ - -@@ -7,7 +7,7 @@ - - /* Check for #system assertions. */ - --#if defined __gnu_linux__ -+#if defined __linux__ - # if !#system(linux) || !#system(unix) || !#system(posix) - # error - # endif ---- a/src/gcc/testsuite/gcc.dg/delay-slot-2.c -+++ b/src/gcc/testsuite/gcc.dg/delay-slot-2.c -@@ -0,0 +1,116 @@ -+/* PR rtl-optimization/51187 */ -+/* Reported by Jurij Smakov */ ++ int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; ++ int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; ++ int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); + -+/* { dg-do compile } */ -+/* { dg-options "-g -O2" } */ ++ /* The calculation of stage count is done adding the number of stages ++ before cycle zero and after cycle zero. */ ++ stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii); + -+extern int printf (__const char *__restrict __format, ...); -+extern void print_c_condition (const char *); ++ return stage_count; ++} + -+enum decision_type -+{ -+ DT_num_insns, -+ DT_mode, DT_code, DT_veclen, -+ DT_elt_zero_int, DT_elt_one_int, DT_elt_zero_wide, DT_elt_zero_wide_safe, -+ DT_const_int, -+ DT_veclen_ge, DT_dup, DT_pred, DT_c_test, -+ DT_accept_op, DT_accept_insn -+}; + /* Rotate the rows of PS such that insns scheduled at time + START_CYCLE will appear in row 0. Updates max/min_cycles. */ + void +@@ -2837,11 +3296,16 @@ + for (i = 0; i < backward_rotates; i++) + { + ps_insn_ptr first_row = ps->rows[0]; ++ int first_row_length = ps->rows_length[0]; + + for (row = 0; row < last_row; row++) +- ps->rows[row] = ps->rows[row+1]; ++ { ++ ps->rows[row] = ps->rows[row + 1]; ++ ps->rows_length[row] = ps->rows_length[row + 1]; ++ } + + ps->rows[last_row] = first_row; ++ ps->rows_length[last_row] = first_row_length; + } + + ps->max_cycle -= start_cycle; +--- a/src/gcc/optabs.c ++++ b/src/gcc/optabs.c +@@ -225,6 +225,61 @@ + return 1; + } + ++/* Given two input operands, OP0 and OP1, determine what the correct from_mode ++ for a widening operation would be. In most cases this would be OP0, but if ++ that's a constant it'll be VOIDmode, which isn't useful. */ + -+struct decision_test ++static enum machine_mode ++widened_mode (enum machine_mode to_mode, rtx op0, rtx op1) +{ -+ struct decision_test *next; -+ enum decision_type type; -+ -+ union -+ { -+ int num_insns; ++ enum machine_mode m0 = GET_MODE (op0); ++ enum machine_mode m1 = GET_MODE (op1); ++ enum machine_mode result; + -+ struct -+ { -+ const char *name; -+ } pred; ++ if (m0 == VOIDmode && m1 == VOIDmode) ++ return to_mode; ++ else if (m0 == VOIDmode || GET_MODE_SIZE (m0) < GET_MODE_SIZE (m1)) ++ result = m1; ++ else ++ result = m0; + -+ const char *c_test; -+ int veclen; -+ int dup; -+ long intval; -+ int opno; -+ -+ struct { -+ int code_number; -+ int lineno; -+ int num_clobbers_to_add; -+ } insn; -+ } u; -+}; ++ if (GET_MODE_SIZE (result) > GET_MODE_SIZE (to_mode)) ++ return to_mode; + -+enum routine_type { -+ RECOG, SPLIT, PEEPHOLE2 -+}; ++ return result; ++} ++ ++/* Find a widening optab even if it doesn't widen as much as we want. ++ E.g. if from_mode is HImode, and to_mode is DImode, and there is no ++ direct HI->SI insn, then return SI->DI, if that exists. ++ If PERMIT_NON_WIDENING is non-zero then this can be used with ++ non-widening optabs also. */ + -+void -+write_cond (struct decision_test *p, int depth, -+ enum routine_type subroutine_type) ++enum insn_code ++find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, ++ enum machine_mode from_mode, ++ int permit_non_widening, ++ enum machine_mode *found_mode) +{ -+ switch (p->type) ++ for (; (permit_non_widening || from_mode != to_mode) ++ && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) ++ && from_mode != VOIDmode; ++ from_mode = GET_MODE_WIDER_MODE (from_mode)) + { -+ case DT_num_insns: -+ printf ("peep2_current_count >= %d", p->u.num_insns); -+ break; -+ -+ case DT_code: -+ printf ("GET_CODE (x%d) == ", depth); -+ break; -+ -+ case DT_veclen: -+ printf ("XVECLEN (x%d, 0) == %d", depth, p->u.veclen); -+ break; -+ -+ case DT_elt_zero_int: -+ printf ("XINT (x%d, 0) == %d", depth, (int) p->u.intval); -+ break; -+ -+ case DT_elt_one_int: -+ printf ("XINT (x%d, 1) == %d", depth, (int) p->u.intval); -+ break; -+ -+ case DT_elt_zero_wide: -+ case DT_elt_zero_wide_safe: -+ printf ("XWINT (x%d, 0) == ", depth); -+ print_host_wide_int (p->u.intval); -+ break; -+ -+ case DT_const_int: -+ printf ("x%d == const_int_rtx[MAX_SAVED_CONST_INT + (%d)]", -+ depth, (int) p->u.intval); -+ break; -+ -+ case DT_veclen_ge: -+ printf ("XVECLEN (x%d, 0) >= %d", depth, p->u.veclen); -+ break; -+ -+ case DT_dup: -+ printf ("rtx_equal_p (x%d, operands[%d])", depth, p->u.dup); -+ break; -+ -+ case DT_pred: -+ printf ("%s (x%d)", p->u.pred.name, depth); -+ break; -+ -+ case DT_c_test: -+ print_c_condition (p->u.c_test); -+ break; -+ -+ case DT_accept_insn: -+ ((void)(__builtin_expect(!(subroutine_type == RECOG), 0) ? __builtin_unreachable(), 0 : 0)); -+ ((void)(__builtin_expect(!(p->u.insn.num_clobbers_to_add), 0) ? __builtin_unreachable(), 0 : 0)); -+ printf ("pnum_clobbers != NULL"); -+ break; ++ enum insn_code handler = widening_optab_handler (op, to_mode, ++ from_mode); + -+ default: -+ __builtin_unreachable(); ++ if (handler != CODE_FOR_nothing) ++ { ++ if (found_mode) ++ *found_mode = from_mode; ++ return handler; ++ } + } -+} + -+/* { dg-final { scan-assembler "printf" } } */ ---- a/src/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c -+++ b/src/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c -@@ -0,0 +1,164 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target sync_longlong } */ -+/* { dg-options "-std=gnu99" } */ -+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ -+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++ return CODE_FOR_nothing; ++} ++ + /* Widen OP to MODE and return the rtx for the widened operand. UNSIGNEDP + says whether OP is signed or unsigned. NO_EXTEND is nonzero if we need + not actually do a sign-extend or zero-extend, but can leave the +@@ -399,6 +454,14 @@ + return TYPE_UNSIGNED (type) ? + vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ return TYPE_UNSIGNED (type) ? ++ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab; + ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ return TYPE_UNSIGNED (type) ? ++ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab; + -+/* Test basic functionality of the intrinsics. The operations should -+ not be optimized away if no one checks the return values. */ + case VEC_UNPACK_HI_EXPR: + return TYPE_UNSIGNED (type) ? + vec_unpacku_hi_optab : vec_unpacks_hi_optab; +@@ -517,8 +580,9 @@ + optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); + if (ops->code == WIDEN_MULT_PLUS_EXPR + || ops->code == WIDEN_MULT_MINUS_EXPR) +- icode = (int) optab_handler (widen_pattern_optab, +- TYPE_MODE (TREE_TYPE (ops->op2))); ++ icode = (int) find_widening_optab_handler (widen_pattern_optab, ++ TYPE_MODE (TREE_TYPE (ops->op2)), ++ tmode0, 0); + else + icode = (int) optab_handler (widen_pattern_optab, tmode0); + gcc_assert (icode != CODE_FOR_nothing); +@@ -1389,7 +1453,9 @@ + rtx target, int unsignedp, enum optab_methods methods, + rtx last) + { +- int icode = (int) optab_handler (binoptab, mode); ++ enum machine_mode from_mode = widened_mode (mode, op0, op1); ++ int icode = (int) find_widening_optab_handler (binoptab, mode, ++ from_mode, 1); + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + enum machine_mode tmp_mode; +@@ -1546,7 +1612,9 @@ + /* If we can do it with a three-operand insn, do so. */ + + if (methods != OPTAB_MUST_WIDEN +- && optab_handler (binoptab, mode) != CODE_FOR_nothing) ++ && find_widening_optab_handler (binoptab, mode, ++ widened_mode (mode, op0, op1), 1) ++ != CODE_FOR_nothing) + { + temp = expand_binop_directly (mode, binoptab, op0, op1, target, + unsignedp, methods, last); +@@ -1586,8 +1654,9 @@ + + if (binoptab == smul_optab + && GET_MODE_WIDER_MODE (mode) != VOIDmode +- && (optab_handler ((unsignedp ? umul_widen_optab : smul_widen_optab), +- GET_MODE_WIDER_MODE (mode)) ++ && (widening_optab_handler ((unsignedp ? umul_widen_optab ++ : smul_widen_optab), ++ GET_MODE_WIDER_MODE (mode), mode) + != CODE_FOR_nothing)) + { + temp = expand_binop (GET_MODE_WIDER_MODE (mode), +@@ -1618,9 +1687,11 @@ + if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing + || (binoptab == smul_optab + && GET_MODE_WIDER_MODE (wider_mode) != VOIDmode +- && (optab_handler ((unsignedp ? umul_widen_optab +- : smul_widen_optab), +- GET_MODE_WIDER_MODE (wider_mode)) ++ && (find_widening_optab_handler ((unsignedp ++ ? umul_widen_optab ++ : smul_widen_optab), ++ GET_MODE_WIDER_MODE (wider_mode), ++ mode, 0) + != CODE_FOR_nothing))) + { + rtx xop0 = op0, xop1 = op1; +@@ -2043,8 +2114,8 @@ + && optab_handler (add_optab, word_mode) != CODE_FOR_nothing) + { + rtx product = NULL_RTX; +- +- if (optab_handler (umul_widen_optab, mode) != CODE_FOR_nothing) ++ if (widening_optab_handler (umul_widen_optab, mode, word_mode) ++ != CODE_FOR_nothing) + { + product = expand_doubleword_mult (mode, op0, op1, target, + true, methods); +@@ -2053,7 +2124,8 @@ + } + + if (product == NULL_RTX +- && optab_handler (smul_widen_optab, mode) != CODE_FOR_nothing) ++ && widening_optab_handler (smul_widen_optab, mode, word_mode) ++ != CODE_FOR_nothing) + { + product = expand_doubleword_mult (mode, op0, op1, target, + false, methods); +@@ -2144,7 +2216,8 @@ + wider_mode != VOIDmode; + wider_mode = GET_MODE_WIDER_MODE (wider_mode)) + { +- if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing ++ if (find_widening_optab_handler (binoptab, wider_mode, mode, 1) ++ != CODE_FOR_nothing + || (methods == OPTAB_LIB + && optab_libfunc (binoptab, wider_mode))) + { +@@ -6171,6 +6244,9 @@ + init_optab (usashl_optab, US_ASHIFT); + init_optab (ashr_optab, ASHIFTRT); + init_optab (lshr_optab, LSHIFTRT); ++ init_optabv (vashl_optab, ASHIFT); ++ init_optabv (vashr_optab, ASHIFTRT); ++ init_optabv (vlshr_optab, LSHIFTRT); + init_optab (rotl_optab, ROTATE); + init_optab (rotr_optab, ROTATERT); + init_optab (smin_optab, SMIN); +@@ -6283,6 +6359,10 @@ + init_optab (vec_widen_umult_lo_optab, UNKNOWN); + init_optab (vec_widen_smult_hi_optab, UNKNOWN); + init_optab (vec_widen_smult_lo_optab, UNKNOWN); ++ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN); ++ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN); ++ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN); ++ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN); + init_optab (vec_unpacks_hi_optab, UNKNOWN); + init_optab (vec_unpacks_lo_optab, UNKNOWN); + init_optab (vec_unpacku_hi_optab, UNKNOWN); +--- a/src/gcc/optabs.h ++++ b/src/gcc/optabs.h +@@ -42,6 +42,11 @@ + int insn_code; + }; + ++struct widening_optab_handlers ++{ ++ struct optab_handlers handlers[NUM_MACHINE_MODES][NUM_MACHINE_MODES]; ++}; + -+/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long -+ (an explicit 64bit type maybe a better bet) and 2) Use values that cross -+ the 32bit boundary and cause carries since the actual maths are done as -+ pairs of 32 bit instructions. */ + struct optab_d + { + enum rtx_code code; +@@ -50,6 +55,7 @@ + void (*libcall_gen)(struct optab_d *, const char *name, char suffix, + enum machine_mode); + struct optab_handlers handlers[NUM_MACHINE_MODES]; ++ struct widening_optab_handlers *widening; + }; + typedef struct optab_d * optab; + +@@ -344,6 +350,12 @@ + OTI_vec_widen_umult_lo, + OTI_vec_widen_smult_hi, + OTI_vec_widen_smult_lo, ++ /* Widening shift left. ++ The high/low part of the resulting vector is returned. */ ++ OTI_vec_widen_ushiftl_hi, ++ OTI_vec_widen_ushiftl_lo, ++ OTI_vec_widen_sshiftl_hi, ++ OTI_vec_widen_sshiftl_lo, + /* Extract and widen the high/low part of a vector of signed or + floating point elements. */ + OTI_vec_unpacks_hi, +@@ -536,6 +548,10 @@ + #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo]) + #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi]) + #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo]) ++#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi]) ++#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo]) ++#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi]) ++#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo]) + #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi]) + #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo]) + #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi]) +@@ -578,6 +594,9 @@ + COI_satfract, + COI_satfractuns, + ++ COI_vec_load_lanes, ++ COI_vec_store_lanes, + -+/* Note: This file is #included by some of the ARM tests. */ + COI_MAX + }; + +@@ -598,6 +617,8 @@ + #define fractuns_optab (&convert_optab_table[COI_fractuns]) + #define satfract_optab (&convert_optab_table[COI_satfract]) + #define satfractuns_optab (&convert_optab_table[COI_satfractuns]) ++#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes]) ++#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes]) + + /* Contains the optab used for each rtx code. */ + extern optab code_to_optab[NUM_RTX_CODE + 1]; +@@ -794,6 +815,15 @@ + extern void emit_unop_insn (int, rtx, rtx, enum rtx_code); + extern bool maybe_emit_unop_insn (int, rtx, rtx, enum rtx_code); + ++/* Find a widening optab even if it doesn't widen as much as we want. */ ++#define find_widening_optab_handler(A,B,C,D) \ ++ find_widening_optab_handler_and_mode (A, B, C, D, NULL) ++extern enum insn_code find_widening_optab_handler_and_mode (optab, ++ enum machine_mode, ++ enum machine_mode, ++ int, ++ enum machine_mode *); + -+__extension__ typedef __SIZE_TYPE__ size_t; + /* An extra flag to control optab_for_tree_code's behavior. This is needed to + distinguish between machines with a vector shift that takes a scalar for the + shift amount vs. machines that take a vector for the shift amount. */ +@@ -869,6 +899,23 @@ + + (int) CODE_FOR_nothing); + } + ++/* Like optab_handler, but for widening_operations that have a TO_MODE and ++ a FROM_MODE. */ + -+extern void abort (void); -+extern void *memcpy (void *, const void *, size_t); -+extern int memcmp (const void *, const void *, size_t); ++static inline enum insn_code ++widening_optab_handler (optab op, enum machine_mode to_mode, ++ enum machine_mode from_mode) ++{ ++ if (to_mode == from_mode || from_mode == VOIDmode) ++ return optab_handler (op, to_mode); + -+/* Temporary space where the work actually gets done. */ -+static long long AL[24]; -+/* Values copied into AL before we start. */ -+static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1, ++ if (op->widening) ++ return (enum insn_code) (op->widening->handlers[(int) to_mode][(int) from_mode].insn_code ++ + (int) CODE_FOR_nothing); + -+ 0x100000002ll, 0x100000002ll, -+ 0x100000002ll, 0x100000002ll, ++ return CODE_FOR_nothing; ++} + -+ 0, 0x1000e0de0000ll, -+ 42 , 0xc001c0de0000ll, + /* Record that insn CODE should be used to implement mode MODE of OP. */ + + static inline void +@@ -877,6 +924,26 @@ + op->handlers[(int) mode].insn_code = (int) code - (int) CODE_FOR_nothing; + } + ++/* Like set_optab_handler, but for widening operations that have a TO_MODE ++ and a FROM_MODE. */ + -+ -1ll, 0, 0xff00ff0000ll, -1ll, ++static inline void ++set_widening_optab_handler (optab op, enum machine_mode to_mode, ++ enum machine_mode from_mode, enum insn_code code) ++{ ++ if (to_mode == from_mode) ++ set_optab_handler (op, to_mode, code); ++ else ++ { ++ if (op->widening == NULL) ++ op->widening = (struct widening_optab_handlers *) ++ xcalloc (1, sizeof (struct widening_optab_handlers)); + -+ 0, 0x1000e0de0000ll, -+ 42 , 0xc001c0de0000ll, ++ op->widening->handlers[(int) to_mode][(int) from_mode].insn_code ++ = (int) code - (int) CODE_FOR_nothing; ++ } ++} + -+ -1ll, 0, 0xff00ff0000ll, -1ll}; -+/* This is what should be in AL at the end. */ -+static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0, + /* Return the insn used to perform conversion OP from mode FROM_MODE + to mode TO_MODE; return CODE_FOR_nothing if the target does not have + such an insn. */ +--- a/src/gcc/opts.c ++++ b/src/gcc/opts.c +@@ -823,6 +823,12 @@ + opts->x_flag_split_stack = 0; + } + } + -+ 0x100000002ll, 0x100000002ll, -+ 0x100000002ll, 0x100000002ll, ++ /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion ++ is disabled. */ ++ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert) ++ maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0, ++ opts->x_param_values, opts_set->x_param_values); + } + + #define LEFT_COLUMN 27 +--- a/src/gcc/params.def ++++ b/src/gcc/params.def +@@ -344,6 +344,11 @@ + "sms-max-ii-factor", + "A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop", + 100, 0, 0) ++/* The minimum value of stage count that swing modulo scheduler will generate. */ ++DEFPARAM(PARAM_SMS_MIN_SC, ++ "sms-min-sc", ++ "The minimum value of stage count that swing modulo scheduler will generate.", ++ 2, 1, 1) + DEFPARAM(PARAM_SMS_DFA_HISTORY, + "sms-dfa-history", + "The number of cycles the swing modulo scheduler considers when checking conflicts using DFA", +@@ -883,6 +888,13 @@ + "name lookup fails", + 1000, 0, 0) + ++/* Maximum number of conditional store pairs that can be sunk. */ ++DEFPARAM (PARAM_MAX_STORES_TO_SINK, ++ "max-stores-to-sink", ++ "Maximum number of conditional store pairs that can be sunk", ++ 2, 0, 0) + -+ 1, 0xc001c0de0000ll, -+ 20, 0x1000e0de0000ll, + -+ 0x300000007ll , 0x500000009ll, -+ 0xf100ff0001ll, ~0xa00000007ll, + /* + Local variables: + mode:c +--- a/src/gcc/params.h ++++ b/src/gcc/params.h +@@ -206,4 +206,6 @@ + PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO) + #define MIN_NONDEBUG_INSN_UID \ + PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID) ++#define MAX_STORES_TO_SINK \ ++ PARAM_VALUE (PARAM_MAX_STORES_TO_SINK) + #endif /* ! GCC_PARAMS_H */ +--- a/src/gcc/recog.c ++++ b/src/gcc/recog.c +@@ -930,7 +930,9 @@ + return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode + || mode == VOIDmode) + && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) +- && LEGITIMATE_CONSTANT_P (op)); ++ && targetm.legitimate_constant_p (mode == VOIDmode ++ ? GET_MODE (op) ++ : mode, op)); + + /* Except for certain constants with VOIDmode, already checked for, + OP's mode must match MODE if MODE specifies a mode. */ +@@ -1107,7 +1109,9 @@ + && (GET_MODE (op) == mode || mode == VOIDmode + || GET_MODE (op) == VOIDmode) + && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op)) +- && LEGITIMATE_CONSTANT_P (op)); ++ && targetm.legitimate_constant_p (mode == VOIDmode ++ ? GET_MODE (op) ++ : mode, op)); + } + + /* Returns 1 if OP is an operand that is a CONST_INT. */ +--- a/src/gcc/regcprop.c ++++ b/src/gcc/regcprop.c +@@ -418,10 +418,9 @@ + + offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0) + + (BYTES_BIG_ENDIAN ? byteoffset : 0)); +- return gen_rtx_raw_REG (new_mode, +- regno + subreg_regno_offset (regno, orig_mode, +- offset, +- new_mode)); ++ regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); ++ if (HARD_REGNO_MODE_OK (regno, new_mode)) ++ return gen_rtx_raw_REG (new_mode, regno); + } + return NULL_RTX; + } +--- a/src/gcc/reload1.c ++++ b/src/gcc/reload1.c +@@ -4159,6 +4159,9 @@ + } + else if (function_invariant_p (x)) + { ++ enum machine_mode mode; + -+ 1, 0xc001c0de0000ll, -+ 20, 0x1000e0de0000ll, ++ mode = GET_MODE (SET_DEST (set)); + if (GET_CODE (x) == PLUS) + { + /* This is PLUS of frame pointer and a constant, +@@ -4171,12 +4174,11 @@ + reg_equiv_invariant[i] = x; + num_eliminable_invariants++; + } +- else if (LEGITIMATE_CONSTANT_P (x)) ++ else if (targetm.legitimate_constant_p (mode, x)) + reg_equiv_constant[i] = x; + else + { +- reg_equiv_memory_loc[i] +- = force_const_mem (GET_MODE (SET_DEST (set)), x); ++ reg_equiv_memory_loc[i] = force_const_mem (mode, x); + if (! reg_equiv_memory_loc[i]) + reg_equiv_init[i] = NULL_RTX; + } +@@ -4478,6 +4480,43 @@ + } + } + } + -+ 0x300000007ll , 0x500000009ll, -+ 0xf100ff0001ll, ~0xa00000007ll }; ++/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload. ++ If *OP_PTR is a paradoxical subreg, try to remove that subreg ++ and apply the corresponding narrowing subreg to *OTHER_PTR. ++ Return true if the operands were changed, false otherwise. */ + -+/* First check they work in terms of what they do to memory. */ -+static void -+do_noret_di (void) ++static bool ++strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr) +{ -+ __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll); -+ __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll); -+ __sync_lock_test_and_set (AL+2, 1); -+ __sync_lock_release (AL+3); ++ rtx op, inner, other, tem; + -+ /* The following tests should not change the value since the -+ original does NOT match. */ -+ __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll); -+ __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll); -+ __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll); -+ __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll); ++ op = *op_ptr; ++ if (GET_CODE (op) != SUBREG) ++ return false; + -+ __sync_fetch_and_add (AL+8, 1); -+ __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry. */ -+ __sync_fetch_and_sub (AL+10, 22); -+ __sync_fetch_and_sub (AL+11, 0xb000e0000000ll); ++ inner = SUBREG_REG (op); ++ if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner))) ++ return false; + -+ __sync_fetch_and_and (AL+12, 0x300000007ll); -+ __sync_fetch_and_or (AL+13, 0x500000009ll); -+ __sync_fetch_and_xor (AL+14, 0xe00000001ll); -+ __sync_fetch_and_nand (AL+15, 0xa00000007ll); ++ other = *other_ptr; ++ tem = gen_lowpart_common (GET_MODE (inner), other); ++ if (!tem) ++ return false; + -+ /* These should be the same as the fetch_and_* cases except for -+ return value. */ -+ __sync_add_and_fetch (AL+16, 1); -+ /* add to both halves & carry. */ -+ __sync_add_and_fetch (AL+17, 0xb000e0000000ll); -+ __sync_sub_and_fetch (AL+18, 22); -+ __sync_sub_and_fetch (AL+19, 0xb000e0000000ll); ++ /* If the lowpart operation turned a hard register into a subreg, ++ rather than simplifying it to another hard register, then the ++ mode change cannot be properly represented. For example, OTHER ++ might be valid in its current mode, but not in the new one. */ ++ if (GET_CODE (tem) == SUBREG ++ && REG_P (other) ++ && HARD_REGISTER_P (other)) ++ return false; + -+ __sync_and_and_fetch (AL+20, 0x300000007ll); -+ __sync_or_and_fetch (AL+21, 0x500000009ll); -+ __sync_xor_and_fetch (AL+22, 0xe00000001ll); -+ __sync_nand_and_fetch (AL+23, 0xa00000007ll); ++ *op_ptr = inner; ++ *other_ptr = tem; ++ return true; +} + + /* A subroutine of reload_as_needed. If INSN has a REG_EH_REGION note, + examine all of the reload insns between PREV and NEXT exclusive, and +@@ -5558,7 +5597,7 @@ + chain reloads or do need an intermediate hard registers. */ + bool result = true; + int regno, n, code; +- rtx out, in, tem, insn; ++ rtx out, in, insn; + rtx last = get_last_insn (); + + /* Make r2 a component of r1. */ +@@ -5577,11 +5616,7 @@ + + /* If IN is a paradoxical SUBREG, remove it and try to put the + opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ +- if (GET_CODE (in) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (in)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) +- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) +- in = SUBREG_REG (in), out = tem; ++ strip_paradoxical_subreg (&in, &out); + + if (GET_CODE (in) == PLUS + && (REG_P (XEXP (in, 0)) +@@ -6453,6 +6488,8 @@ + + if (regno >= 0 + && reg_last_reload_reg[regno] != 0 ++ && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno])) ++ >= GET_MODE_SIZE (mode) + byte) + #ifdef CANNOT_CHANGE_MODE_CLASS + /* Verify that the register it's in can be used in + mode MODE. */ +@@ -6464,24 +6501,12 @@ + { + enum reg_class rclass = rld[r].rclass, last_class; + rtx last_reg = reg_last_reload_reg[regno]; +- enum machine_mode need_mode; + + i = REGNO (last_reg); + i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode); + last_class = REGNO_REG_CLASS (i); + +- if (byte == 0) +- need_mode = mode; +- else +- need_mode +- = smallest_mode_for_size +- (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT, +- GET_MODE_CLASS (mode) == MODE_PARTIAL_INT +- ? MODE_INT : GET_MODE_CLASS (mode)); +- +- if ((GET_MODE_SIZE (GET_MODE (last_reg)) +- >= GET_MODE_SIZE (need_mode)) +- && reg_reloaded_contents[i] == regno ++ if (reg_reloaded_contents[i] == regno + && TEST_HARD_REG_BIT (reg_reloaded_valid, i) + && HARD_REGNO_MODE_OK (i, rld[r].mode) + && (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i) +@@ -7583,7 +7608,6 @@ + if (tertiary_icode != CODE_FOR_nothing) + { + rtx third_reloadreg = rld[tertiary_reload].reg_rtx; +- rtx tem; + + /* Copy primary reload reg to secondary reload reg. + (Note that these have been swapped above, then +@@ -7592,13 +7616,7 @@ + /* If REAL_OLD is a paradoxical SUBREG, remove it + and try to put the opposite SUBREG on + RELOADREG. */ +- if (GET_CODE (real_old) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (real_old)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old)))) +- && 0 != (tem = gen_lowpart_common +- (GET_MODE (SUBREG_REG (real_old)), +- reloadreg))) +- real_old = SUBREG_REG (real_old), reloadreg = tem; ++ strip_paradoxical_subreg (&real_old, &reloadreg); + + gen_reload (reloadreg, second_reloadreg, + rl->opnum, rl->when_needed); +@@ -8414,16 +8432,8 @@ + + /* If IN is a paradoxical SUBREG, remove it and try to put the + opposite SUBREG on OUT. Likewise for a paradoxical SUBREG on OUT. */ +- if (GET_CODE (in) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (in)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in)))) +- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0) +- in = SUBREG_REG (in), out = tem; +- else if (GET_CODE (out) == SUBREG +- && (GET_MODE_SIZE (GET_MODE (out)) +- > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out)))) +- && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0) +- out = SUBREG_REG (out), in = tem; ++ if (!strip_paradoxical_subreg (&in, &out)) ++ strip_paradoxical_subreg (&out, &in); + + /* How to do this reload can get quite tricky. Normally, we are being + asked to reload a simple operand, such as a MEM, a constant, or a pseudo +--- a/src/gcc/reload.c ++++ b/src/gcc/reload.c +@@ -1017,6 +1017,7 @@ + #ifdef CANNOT_CHANGE_MODE_CLASS + && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass) + #endif ++ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))] + && (CONSTANT_P (SUBREG_REG (in)) + || GET_CODE (SUBREG_REG (in)) == PLUS + || strict_low +@@ -1123,6 +1124,7 @@ + #ifdef CANNOT_CHANGE_MODE_CLASS + && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass) + #endif ++ && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))] + && (CONSTANT_P (SUBREG_REG (out)) + || strict_low + || (((REG_P (SUBREG_REG (out)) +@@ -4721,7 +4723,8 @@ + simplify_gen_subreg (GET_MODE (x), reg_equiv_constant[regno], + GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); + gcc_assert (tem); +- if (CONSTANT_P (tem) && !LEGITIMATE_CONSTANT_P (tem)) ++ if (CONSTANT_P (tem) ++ && !targetm.legitimate_constant_p (GET_MODE (x), tem)) + { + tem = force_const_mem (GET_MODE (x), tem); + i = find_reloads_address (GET_MODE (tem), &tem, XEXP (tem, 0), +@@ -6049,7 +6052,7 @@ + enum reload_type type, int ind_levels) + { + if (CONSTANT_P (x) +- && (! LEGITIMATE_CONSTANT_P (x) ++ && (!targetm.legitimate_constant_p (mode, x) + || targetm.preferred_reload_class (x, rclass) == NO_REGS)) + { + x = force_const_mem (mode, x); +@@ -6059,7 +6062,7 @@ + + else if (GET_CODE (x) == PLUS + && CONSTANT_P (XEXP (x, 1)) +- && (! LEGITIMATE_CONSTANT_P (XEXP (x, 1)) ++ && (!targetm.legitimate_constant_p (GET_MODE (x), XEXP (x, 1)) + || targetm.preferred_reload_class (XEXP (x, 1), rclass) + == NO_REGS)) + { +--- a/src/gcc/sched-deps.c ++++ b/src/gcc/sched-deps.c +@@ -450,7 +450,7 @@ + static void add_dependence_list_and_free (struct deps_desc *, rtx, + rtx *, int, enum reg_note); + static void delete_all_dependences (rtx); +-static void fixup_sched_groups (rtx); ++static void chain_to_prev_insn (rtx); + + static void flush_pending_lists (struct deps_desc *, rtx, int, int); + static void sched_analyze_1 (struct deps_desc *, rtx, rtx); +@@ -1490,7 +1490,7 @@ + the previous nonnote insn. */ + + static void +-fixup_sched_groups (rtx insn) ++chain_to_prev_insn (rtx insn) + { + sd_iterator_def sd_it; + dep_t dep; +@@ -1999,7 +1999,7 @@ + static struct reg_pressure_data *pressure_info; + rtx link; + +- gcc_assert (sched_pressure_p); ++ gcc_assert (sched_pressure != SCHED_PRESSURE_NONE); + + if (! INSN_P (insn)) + return; +@@ -2030,8 +2030,9 @@ + len = sizeof (struct reg_pressure_data) * ira_reg_class_cover_size; + pressure_info + = INSN_REG_PRESSURE (insn) = (struct reg_pressure_data *) xmalloc (len); +- INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size +- * sizeof (int), 1); ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) ++ INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size ++ * sizeof (int), 1); + for (i = 0; i < ira_reg_class_cover_size; i++) + { + cl = ira_reg_class_cover[i]; +@@ -2775,7 +2776,7 @@ + || (NONJUMP_INSN_P (insn) && control_flow_insn_p (insn))) + reg_pending_barrier = MOVE_BARRIER; + +- if (sched_pressure_p) ++ if (sched_pressure != SCHED_PRESSURE_NONE) + { + setup_insn_reg_uses (deps, insn); + setup_insn_reg_pressure_info (insn); +@@ -3076,7 +3077,7 @@ + instructions that follow seem like they should be part + of the call group. + +- Also, if we did, fixup_sched_groups() would move the ++ Also, if we did, chain_to_prev_insn would move the + deps of the debug insn to the call insn, modifying + non-debug post-dependency counts of the debug insn + dependencies and otherwise messing with the scheduling +@@ -3222,6 +3223,37 @@ + return true; + } + ++/* Return true if INSN should be made dependent on the previous instruction ++ group, and if all INSN's dependencies should be moved to the first ++ instruction of that group. */ + -+/* Now check return values. */ -+static void -+do_ret_di (void) ++static bool ++chain_to_prev_insn_p (rtx insn) +{ -+ if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) != -+ 0x100000002ll) abort (); -+ if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) != -+ 1) abort (); -+ if (__sync_lock_test_and_set (AL+2, 1) != 0) abort (); -+ __sync_lock_release (AL+3); /* no return value, but keep to match results. */ -+ -+ /* The following tests should not change the value since the -+ original does NOT match. */ -+ if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) != -+ 0x100000002ll) abort (); -+ if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) != -+ 0x100000002ll) abort (); -+ if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) != -+ 0) abort (); -+ if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) != -+ 0) abort (); -+ -+ if (__sync_fetch_and_add (AL+8, 1) != 0) abort (); -+ if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort (); -+ if (__sync_fetch_and_sub (AL+10, 22) != 42) abort (); -+ if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll) -+ abort (); ++ rtx prev, x; + -+ if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort (); -+ if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort (); -+ if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort (); -+ if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort (); ++ /* INSN forms a group with the previous instruction. */ ++ if (SCHED_GROUP_P (insn)) ++ return true; + -+ /* These should be the same as the fetch_and_* cases except for -+ return value. */ -+ if (__sync_add_and_fetch (AL+16, 1) != 1) abort (); -+ if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll) -+ abort (); -+ if (__sync_sub_and_fetch (AL+18, 22) != 20) abort (); -+ if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll) -+ abort (); ++ /* If the previous instruction clobbers a register R and this one sets ++ part of R, the clobber was added specifically to help us track the ++ liveness of R. There's no point scheduling the clobber and leaving ++ INSN behind, especially if we move the clobber to another block. */ ++ prev = prev_nonnote_nondebug_insn (insn); ++ if (prev ++ && INSN_P (prev) ++ && BLOCK_FOR_INSN (prev) == BLOCK_FOR_INSN (insn) ++ && GET_CODE (PATTERN (prev)) == CLOBBER) ++ { ++ x = XEXP (PATTERN (prev), 0); ++ if (set_of (x, insn)) ++ return true; ++ } + -+ if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort (); -+ if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort (); -+ if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort (); -+ if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort (); ++ return false; +} + -+int main () -+{ -+ memcpy (AL, init_di, sizeof (init_di)); -+ -+ do_noret_di (); -+ -+ if (memcmp (AL, test_di, sizeof (test_di))) -+ abort (); -+ -+ memcpy (AL, init_di, sizeof (init_di)); -+ -+ do_ret_di (); -+ -+ if (memcmp (AL, test_di, sizeof (test_di))) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.dg/di-sync-multithread.c -+++ b/src/gcc/testsuite/gcc.dg/di-sync-multithread.c -@@ -0,0 +1,205 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target sync_longlong } */ -+/* { dg-require-effective-target pthread_h } */ -+/* { dg-require-effective-target pthread } */ -+/* { dg-options "-pthread -std=gnu99" } */ + /* Analyze INSN with DEPS as a context. */ + void + deps_analyze_insn (struct deps_desc *deps, rtx insn) +@@ -3358,8 +3390,9 @@ + + /* Fixup the dependencies in the sched group. */ + if ((NONJUMP_INSN_P (insn) || JUMP_P (insn)) +- && SCHED_GROUP_P (insn) && !sel_sched_p ()) +- fixup_sched_groups (insn); ++ && chain_to_prev_insn_p (insn) ++ && !sel_sched_p ()) ++ chain_to_prev_insn (insn); + } + + /* Initialize DEPS for the new block beginning with HEAD. */ +--- a/src/gcc/sched-int.h ++++ b/src/gcc/sched-int.h +@@ -651,7 +651,7 @@ + + /* Do register pressure sensitive insn scheduling if the flag is set + up. */ +-extern bool sched_pressure_p; ++extern enum sched_pressure_algorithm sched_pressure; + + /* Map regno -> its cover class. The map defined only when + SCHED_PRESSURE_P is true. */ +@@ -773,16 +773,16 @@ + + short cost; + +- /* Set if there's DEF-USE dependence between some speculatively +- moved load insn and this one. */ +- unsigned int fed_by_spec_load : 1; +- unsigned int is_load_insn : 1; +- + /* '> 0' if priority is valid, + '== 0' if priority was not yet computed, + '< 0' if priority in invalid and should be recomputed. */ + signed char priority_status; + ++ /* Set if there's DEF-USE dependence between some speculatively ++ moved load insn and this one. */ ++ unsigned int fed_by_spec_load : 1; ++ unsigned int is_load_insn : 1; ++ + /* What speculations are necessary to apply to schedule the instruction. */ + ds_t todo_spec; + +@@ -817,6 +817,7 @@ + /* Info about how scheduling the insn changes cost of register + pressure excess (between source and target). */ + int reg_pressure_excess_cost_change; ++ int model_index; + }; + + typedef struct _haifa_insn_data haifa_insn_data_def; +@@ -839,6 +840,7 @@ + #define INSN_REG_PRESSURE_EXCESS_COST_CHANGE(INSN) \ + (HID (INSN)->reg_pressure_excess_cost_change) + #define INSN_PRIORITY_STATUS(INSN) (HID (INSN)->priority_status) ++#define INSN_MODEL_INDEX(INSN) (HID (INSN)->model_index) + + typedef struct _haifa_deps_insn_data haifa_deps_insn_data_def; + typedef haifa_deps_insn_data_def *haifa_deps_insn_data_t; +--- a/src/gcc/sched-rgn.c ++++ b/src/gcc/sched-rgn.c +@@ -2943,7 +2943,7 @@ + + sched_extend_ready_list (rgn_n_insns); + +- if (sched_pressure_p) ++ if (sched_pressure == SCHED_PRESSURE_WEIGHTED) + { + sched_init_region_reg_pressure_info (); + for (bb = 0; bb < current_nr_blocks; bb++) +--- a/src/gcc/simplify-rtx.c ++++ b/src/gcc/simplify-rtx.c +@@ -1001,6 +1001,48 @@ + && GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF) + return XEXP (op, 0); + ++ /* Extending a widening multiplication should be canonicalized to ++ a wider widening multiplication. */ ++ if (GET_CODE (op) == MULT) ++ { ++ rtx lhs = XEXP (op, 0); ++ rtx rhs = XEXP (op, 1); ++ enum rtx_code lcode = GET_CODE (lhs); ++ enum rtx_code rcode = GET_CODE (rhs); + -+/* test of long long atomic ops performed in parallel in 3 pthreads -+ david.gilbert@linaro.org */ ++ /* Widening multiplies usually extend both operands, but sometimes ++ they use a shift to extract a portion of a register. */ ++ if ((lcode == SIGN_EXTEND ++ || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1)))) ++ && (rcode == SIGN_EXTEND ++ || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1))))) ++ { ++ enum machine_mode lmode = GET_MODE (lhs); ++ enum machine_mode rmode = GET_MODE (rhs); ++ int bits; + -+#include -+#include ++ if (lcode == ASHIFTRT) ++ /* Number of bits not shifted off the end. */ ++ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1)); ++ else /* lcode == SIGN_EXTEND */ ++ /* Size of inner mode. */ ++ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0))); + -+/*#define DEBUGIT 1 */ ++ if (rcode == ASHIFTRT) ++ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1)); ++ else /* rcode == SIGN_EXTEND */ ++ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0))); + -+#ifdef DEBUGIT -+#include ++ /* We can only widen multiplies if the result is mathematiclly ++ equivalent. I.e. if overflow was impossible. */ ++ if (bits <= GET_MODE_PRECISION (GET_MODE (op))) ++ return simplify_gen_binary ++ (MULT, mode, ++ simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode), ++ simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode)); ++ } ++ } + -+#define DOABORT(x,...) {\ -+ fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\ -+ } + /* Check for a sign extension of a subreg of a promoted + variable, where the promotion is sign-extended, and the + target mode is the same as the variable's promotion. */ +@@ -1072,6 +1114,48 @@ + && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))) + return rtl_hooks.gen_lowpart_no_emit (mode, op); + ++ /* Extending a widening multiplication should be canonicalized to ++ a wider widening multiplication. */ ++ if (GET_CODE (op) == MULT) ++ { ++ rtx lhs = XEXP (op, 0); ++ rtx rhs = XEXP (op, 1); ++ enum rtx_code lcode = GET_CODE (lhs); ++ enum rtx_code rcode = GET_CODE (rhs); + -+#else ++ /* Widening multiplies usually extend both operands, but sometimes ++ they use a shift to extract a portion of a register. */ ++ if ((lcode == ZERO_EXTEND ++ || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1)))) ++ && (rcode == ZERO_EXTEND ++ || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1))))) ++ { ++ enum machine_mode lmode = GET_MODE (lhs); ++ enum machine_mode rmode = GET_MODE (rhs); ++ int bits; + -+#define DOABORT(x,...) abort (); ++ if (lcode == LSHIFTRT) ++ /* Number of bits not shifted off the end. */ ++ bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1)); ++ else /* lcode == ZERO_EXTEND */ ++ /* Size of inner mode. */ ++ bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0))); + -+#endif ++ if (rcode == LSHIFTRT) ++ bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1)); ++ else /* rcode == ZERO_EXTEND */ ++ bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0))); + -+/* Passed to each thread to describe which bits it is going to work on. */ -+struct threadwork { -+ unsigned long long count; /* incremented each time the worker loops. */ -+ unsigned int thread; /* ID */ -+ unsigned int addlsb; /* 8 bit */ -+ unsigned int logic1lsb; /* 5 bit */ -+ unsigned int logic2lsb; /* 8 bit */ -+}; ++ /* We can only widen multiplies if the result is mathematiclly ++ equivalent. I.e. if overflow was impossible. */ ++ if (bits <= GET_MODE_PRECISION (GET_MODE (op))) ++ return simplify_gen_binary ++ (MULT, mode, ++ simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode), ++ simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode)); ++ } ++ } + -+/* The shared word where all the atomic work is done. */ -+static volatile long long workspace; + /* (zero_extend:M (zero_extend:N )) is (zero_extend:M ). */ + if (GET_CODE (op) == ZERO_EXTEND) + return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0), +@@ -2507,6 +2591,46 @@ + XEXP (op0, 1), mode), + op1); + ++ /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P), ++ we can transform like this: ++ (A&B)^C == ~(A&B)&C | ~C&(A&B) ++ == (~A|~B)&C | ~C&(A&B) * DeMorgan's Law ++ == ~A&C | ~B&C | A&(~C&B) * Distribute and re-order ++ Attempt a few simplifications when B and C are both constants. */ ++ if (GET_CODE (op0) == AND ++ && CONST_INT_P (op1) ++ && CONST_INT_P (XEXP (op0, 1))) ++ { ++ rtx a = XEXP (op0, 0); ++ rtx b = XEXP (op0, 1); ++ rtx c = op1; ++ HOST_WIDE_INT bval = INTVAL (b); ++ HOST_WIDE_INT cval = INTVAL (c); + -+/* A shared word to tell the workers to quit when non-0. */ -+static long long doquit; ++ rtx na_c ++ = simplify_binary_operation (AND, mode, ++ simplify_gen_unary (NOT, mode, a, mode), ++ c); ++ if ((~cval & bval) == 0) ++ { ++ /* Try to simplify ~A&C | ~B&C. */ ++ if (na_c != NULL_RTX) ++ return simplify_gen_binary (IOR, mode, na_c, ++ GEN_INT (~bval & cval)); ++ } ++ else ++ { ++ /* If ~A&C is zero, simplify A&(~C&B) | ~B&C. */ ++ if (na_c == const0_rtx) ++ { ++ rtx a_nc_b = simplify_gen_binary (AND, mode, a, ++ GEN_INT (~cval & bval)); ++ return simplify_gen_binary (IOR, mode, a_nc_b, ++ GEN_INT (~bval & cval)); ++ } ++ } ++ } + -+extern void abort (void); + /* (xor (comparison foo bar) (const_int 1)) can become the reversed + comparison if STORE_FLAG_VALUE is 1. */ + if (STORE_FLAG_VALUE == 1 +@@ -5454,6 +5578,7 @@ + /* Optimize SUBREG truncations of zero and sign extended values. */ + if ((GET_CODE (op) == ZERO_EXTEND + || GET_CODE (op) == SIGN_EXTEND) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)) + { + unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); +@@ -5492,6 +5617,7 @@ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE + to avoid the possibility that an outer LSHIFTRT shifts by more + than the sign extension's sign_bit_copies and introduces zeros +@@ -5511,6 +5637,7 @@ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) + && CONST_INT_P (XEXP (op, 1)) + && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND +@@ -5525,6 +5652,7 @@ + the outer subreg is effectively a truncation to the original mode. */ + if (GET_CODE (op) == ASHIFT + && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode) + && CONST_INT_P (XEXP (op, 1)) + && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND +@@ -5538,7 +5666,7 @@ + /* Recognize a word extraction from a multi-word subreg. */ + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) +- && SCALAR_INT_MODE_P (outermode) ++ && SCALAR_INT_MODE_P (innermode) + && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD + && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode)) + && CONST_INT_P (XEXP (op, 1)) +@@ -5560,6 +5688,7 @@ + + if ((GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ASHIFTRT) ++ && SCALAR_INT_MODE_P (innermode) + && MEM_P (XEXP (op, 0)) + && CONST_INT_P (XEXP (op, 1)) + && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op)) +--- a/src/gcc/stmt.c ++++ b/src/gcc/stmt.c +@@ -1683,119 +1683,21 @@ + expand_value_return (result_rtl); + + /* If the result is an aggregate that is being returned in one (or more) +- registers, load the registers here. The compiler currently can't handle +- copying a BLKmode value into registers. We could put this code in a +- more general area (for use by everyone instead of just function +- call/return), but until this feature is generally usable it is kept here +- (and in expand_call). */ ++ registers, load the registers here. */ + + else if (retval_rhs != 0 + && TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode + && REG_P (result_rtl)) + { +- int i; +- unsigned HOST_WIDE_INT bitpos, xbitpos; +- unsigned HOST_WIDE_INT padding_correction = 0; +- unsigned HOST_WIDE_INT bytes +- = int_size_in_bytes (TREE_TYPE (retval_rhs)); +- int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; +- unsigned int bitsize +- = MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD); +- rtx *result_pseudos = XALLOCAVEC (rtx, n_regs); +- rtx result_reg, src = NULL_RTX, dst = NULL_RTX; +- rtx result_val = expand_normal (retval_rhs); +- enum machine_mode tmpmode, result_reg_mode; +- +- if (bytes == 0) +- { +- expand_null_return (); +- return; +- } +- +- /* If the structure doesn't take up a whole number of words, see +- whether the register value should be padded on the left or on +- the right. Set PADDING_CORRECTION to the number of padding +- bits needed on the left side. +- +- In most ABIs, the structure will be returned at the least end of +- the register, which translates to right padding on little-endian +- targets and left padding on big-endian targets. The opposite +- holds if the structure is returned at the most significant +- end of the register. */ +- if (bytes % UNITS_PER_WORD != 0 +- && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs)) +- ? !BYTES_BIG_ENDIAN +- : BYTES_BIG_ENDIAN)) +- padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) +- * BITS_PER_UNIT)); +- +- /* Copy the structure BITSIZE bits at a time. */ +- for (bitpos = 0, xbitpos = padding_correction; +- bitpos < bytes * BITS_PER_UNIT; +- bitpos += bitsize, xbitpos += bitsize) +- { +- /* We need a new destination pseudo each time xbitpos is +- on a word boundary and when xbitpos == padding_correction +- (the first time through). */ +- if (xbitpos % BITS_PER_WORD == 0 +- || xbitpos == padding_correction) +- { +- /* Generate an appropriate register. */ +- dst = gen_reg_rtx (word_mode); +- result_pseudos[xbitpos / BITS_PER_WORD] = dst; +- +- /* Clear the destination before we move anything into it. */ +- emit_move_insn (dst, CONST0_RTX (GET_MODE (dst))); +- } +- +- /* We need a new source operand each time bitpos is on a word +- boundary. */ +- if (bitpos % BITS_PER_WORD == 0) +- src = operand_subword_force (result_val, +- bitpos / BITS_PER_WORD, +- BLKmode); +- +- /* Use bitpos for the source extraction (left justified) and +- xbitpos for the destination store (right justified). */ +- store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, word_mode, +- extract_bit_field (src, bitsize, +- bitpos % BITS_PER_WORD, 1, false, +- NULL_RTX, word_mode, word_mode)); +- } +- +- tmpmode = GET_MODE (result_rtl); +- if (tmpmode == BLKmode) ++ val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs); ++ if (val) + { +- /* Find the smallest integer mode large enough to hold the +- entire structure and use that mode instead of BLKmode +- on the USE insn for the return register. */ +- for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT); +- tmpmode != VOIDmode; +- tmpmode = GET_MODE_WIDER_MODE (tmpmode)) +- /* Have we found a large enough mode? */ +- if (GET_MODE_SIZE (tmpmode) >= bytes) +- break; +- +- /* A suitable mode should have been found. */ +- gcc_assert (tmpmode != VOIDmode); +- +- PUT_MODE (result_rtl, tmpmode); ++ /* Use the mode of the result value on the return register. */ ++ PUT_MODE (result_rtl, GET_MODE (val)); ++ expand_value_return (val); + } +- +- if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode)) +- result_reg_mode = word_mode; + else +- result_reg_mode = tmpmode; +- result_reg = gen_reg_rtx (result_reg_mode); +- +- for (i = 0; i < n_regs; i++) +- emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode), +- result_pseudos[i]); +- +- if (tmpmode != result_reg_mode) +- result_reg = gen_lowpart (tmpmode, result_reg); +- +- expand_value_return (result_reg); ++ expand_null_return (); + } + else if (retval_rhs != 0 + && !VOID_TYPE_P (TREE_TYPE (retval_rhs)) +--- a/src/gcc/stor-layout.c ++++ b/src/gcc/stor-layout.c +@@ -546,6 +546,34 @@ + return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT)); + } + ++/* Return the natural mode of an array, given that it is SIZE bytes in ++ total and has elements of type ELEM_TYPE. */ + -+/* Note this test doesn't test the return values much. */ -+void* -+worker (void* data) ++static enum machine_mode ++mode_for_array (tree elem_type, tree size) +{ -+ struct threadwork *tw = (struct threadwork*)data; -+ long long add1bit = 1ll << tw->addlsb; -+ long long logic1bit = 1ll << tw->logic1lsb; -+ long long logic2bit = 1ll << tw->logic2lsb; ++ tree elem_size; ++ unsigned HOST_WIDE_INT int_size, int_elem_size; ++ bool limit_p; + -+ /* Clear the bits we use. */ -+ __sync_and_and_fetch (&workspace, ~(0xffll * add1bit)); -+ __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit)); -+ __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit)); ++ /* One-element arrays get the component type's mode. */ ++ elem_size = TYPE_SIZE (elem_type); ++ if (simple_cst_equal (size, elem_size)) ++ return TYPE_MODE (elem_type); + -+ do ++ limit_p = true; ++ if (host_integerp (size, 1) && host_integerp (elem_size, 1)) + { -+ long long tmp1, tmp2, tmp3; -+ /* OK, lets try and do some stuff to the workspace - by the end -+ of the main loop our area should be the same as it is now - i.e. 0. */ -+ -+ /* Push the arithmetic section upto 128 - one of the threads will -+ case this to carry accross the 32bit boundary. */ -+ for (tmp2 = 0; tmp2 < 64; tmp2++) -+ { -+ /* Add 2 using the two different adds. */ -+ tmp1 = __sync_add_and_fetch (&workspace, add1bit); -+ tmp3 = __sync_fetch_and_add (&workspace, add1bit); -+ -+ /* The value should be the intermediate add value in both cases. */ -+ if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff))) -+ DOABORT ("Mismatch of add intermediates on thread %d " -+ "workspace=0x%llx tmp1=0x%llx " -+ "tmp2=0x%llx tmp3=0x%llx\n", -+ tw->thread, workspace, tmp1, tmp2, tmp3); -+ } -+ -+ /* Set the logic bits. */ -+ tmp2=__sync_or_and_fetch (&workspace, -+ 0x1fll * logic1bit | 0xffll * logic2bit); -+ -+ /* Check the logic bits are set and the arithmetic value is correct. */ -+ if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit ++ int_size = tree_low_cst (size, 1); ++ int_elem_size = tree_low_cst (elem_size, 1); ++ if (int_elem_size > 0 ++ && int_size % int_elem_size == 0 ++ && targetm.array_mode_supported_p (TYPE_MODE (elem_type), ++ int_size / int_elem_size)) ++ limit_p = false; ++ } ++ return mode_for_size_tree (size, MODE_INT, limit_p); ++} + + /* Subroutine of layout_decl: Force alignment required for the data type. + But if the decl itself wants greater alignment, don't override that. */ +@@ -2048,14 +2076,8 @@ + && (TYPE_MODE (TREE_TYPE (type)) != BLKmode + || TYPE_NO_FORCE_BLK (TREE_TYPE (type)))) + { +- /* One-element arrays get the component type's mode. */ +- if (simple_cst_equal (TYPE_SIZE (type), +- TYPE_SIZE (TREE_TYPE (type)))) +- SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type))); +- else +- SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type), +- MODE_INT, 1)); +- ++ SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type), ++ TYPE_SIZE (type))); + if (TYPE_MODE (type) != BLKmode + && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT + && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type))) +--- a/src/gcc/target.def ++++ b/src/gcc/target.def +@@ -1344,6 +1344,13 @@ + unsigned, (unsigned nunroll, struct loop *loop), + NULL) + ++/* True if X is a legitimate MODE-mode immediate operand. */ ++DEFHOOK ++(legitimate_constant_p, ++ "", ++ bool, (enum machine_mode mode, rtx x), ++ default_legitimate_constant_p) ++ + /* True if the constant X cannot be placed in the constant pool. */ + DEFHOOK + (cannot_force_const_mem, +@@ -1621,6 +1628,38 @@ + HOST_WIDE_INT, (const_tree type), + default_vector_alignment) + ++/* True if we should try to use a scalar mode to represent an array, ++ overriding the usual MAX_FIXED_MODE limit. */ ++DEFHOOK ++(array_mode_supported_p, ++ "Return true if GCC should try to use a scalar mode to store an array\n\ ++of @var{nelems} elements, given that each element has mode @var{mode}.\n\ ++Returning true here overrides the usual @code{MAX_FIXED_MODE} limit\n\ ++and allows GCC to use any defined integer mode.\n\ ++\n\ ++One use of this hook is to support vector load and store operations\n\ ++that operate on several homogeneous vectors. For example, ARM NEON\n\ ++has operations like:\n\ ++\n\ ++@smallexample\n\ ++int8x8x3_t vld3_s8 (const int8_t *)\n\ ++@end smallexample\n\ ++\n\ ++where the return type is defined as:\n\ ++\n\ ++@smallexample\n\ ++typedef struct int8x8x3_t\n\ ++@{\n\ ++ int8x8_t val[3];\n\ ++@} int8x8x3_t;\n\ ++@end smallexample\n\ ++\n\ ++If this hook allows @code{val} to have a scalar mode, then\n\ ++@code{int8x8x3_t} can have the same mode. GCC can then store\n\ ++@code{int8x8x3_t}s in registers rather than forcing them onto the stack.", ++ bool, (enum machine_mode mode, unsigned HOST_WIDE_INT nelems), ++ hook_bool_mode_uhwi_false) ++ + /* Compute cost of moving data from a register of class FROM to one of + TO, using MODE. */ + DEFHOOK +--- a/src/gcc/targhooks.c ++++ b/src/gcc/targhooks.c +@@ -1527,4 +1527,15 @@ + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + ++bool ++default_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, ++ rtx x ATTRIBUTE_UNUSED) ++{ ++#ifdef LEGITIMATE_CONSTANT_P ++ return LEGITIMATE_CONSTANT_P (x); ++#else ++ return true; ++#endif ++} ++ + #include "gt-targhooks.h" +--- a/src/gcc/targhooks.h ++++ b/src/gcc/targhooks.h +@@ -185,3 +185,4 @@ + + extern void *default_get_pch_validity (size_t *); + extern const char *default_pch_valid_p (const void *, size_t); ++extern bool default_legitimate_constant_p (enum machine_mode, rtx); +--- a/src/gcc/testsuite/gcc.c-torture/compile/20110401-1.c ++++ b/src/gcc/testsuite/gcc.c-torture/compile/20110401-1.c +@@ -0,0 +1,22 @@ ++void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len) ++{ ++ int k; ++ unsigned char temp[4]; ++ if (len < 128) { ++ if (ans != ((void *) 0)) ++ ans[0] = (unsigned char) len; ++ *ans_len = 1; ++ } else { ++ k = 0; ++ while (len) { ++ temp[k++] = len & 0xFF; ++ len = len >> 8; ++ } ++ *ans_len = k + 1; ++ if (ans != ((void *) 0)) { ++ ans[0] = ((unsigned char) k & 0x7F) + 128; ++ while (k--) ++ ans[*ans_len - 1 - k] = temp[k]; ++ } ++ } ++} +--- a/src/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c ++++ b/src/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c +@@ -0,0 +1,164 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sync_longlong } */ ++/* { dg-options "-std=gnu99" } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */ ++ ++ ++/* Test basic functionality of the intrinsics. The operations should ++ not be optimized away if no one checks the return values. */ ++ ++/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long ++ (an explicit 64bit type maybe a better bet) and 2) Use values that cross ++ the 32bit boundary and cause carries since the actual maths are done as ++ pairs of 32 bit instructions. */ ++ ++/* Note: This file is #included by some of the ARM tests. */ ++ ++__extension__ typedef __SIZE_TYPE__ size_t; ++ ++extern void abort (void); ++extern void *memcpy (void *, const void *, size_t); ++extern int memcmp (const void *, const void *, size_t); ++ ++/* Temporary space where the work actually gets done. */ ++static long long AL[24]; ++/* Values copied into AL before we start. */ ++static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1, ++ ++ 0x100000002ll, 0x100000002ll, ++ 0x100000002ll, 0x100000002ll, ++ ++ 0, 0x1000e0de0000ll, ++ 42 , 0xc001c0de0000ll, ++ ++ -1ll, 0, 0xff00ff0000ll, -1ll, ++ ++ 0, 0x1000e0de0000ll, ++ 42 , 0xc001c0de0000ll, ++ ++ -1ll, 0, 0xff00ff0000ll, -1ll}; ++/* This is what should be in AL at the end. */ ++static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0, ++ ++ 0x100000002ll, 0x100000002ll, ++ 0x100000002ll, 0x100000002ll, ++ ++ 1, 0xc001c0de0000ll, ++ 20, 0x1000e0de0000ll, ++ ++ 0x300000007ll , 0x500000009ll, ++ 0xf100ff0001ll, ~0xa00000007ll, ++ ++ 1, 0xc001c0de0000ll, ++ 20, 0x1000e0de0000ll, ++ ++ 0x300000007ll , 0x500000009ll, ++ 0xf100ff0001ll, ~0xa00000007ll }; ++ ++/* First check they work in terms of what they do to memory. */ ++static void ++do_noret_di (void) ++{ ++ __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll); ++ __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll); ++ __sync_lock_test_and_set (AL+2, 1); ++ __sync_lock_release (AL+3); ++ ++ /* The following tests should not change the value since the ++ original does NOT match. */ ++ __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll); ++ __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll); ++ __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll); ++ __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll); ++ ++ __sync_fetch_and_add (AL+8, 1); ++ __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry. */ ++ __sync_fetch_and_sub (AL+10, 22); ++ __sync_fetch_and_sub (AL+11, 0xb000e0000000ll); ++ ++ __sync_fetch_and_and (AL+12, 0x300000007ll); ++ __sync_fetch_and_or (AL+13, 0x500000009ll); ++ __sync_fetch_and_xor (AL+14, 0xe00000001ll); ++ __sync_fetch_and_nand (AL+15, 0xa00000007ll); ++ ++ /* These should be the same as the fetch_and_* cases except for ++ return value. */ ++ __sync_add_and_fetch (AL+16, 1); ++ /* add to both halves & carry. */ ++ __sync_add_and_fetch (AL+17, 0xb000e0000000ll); ++ __sync_sub_and_fetch (AL+18, 22); ++ __sync_sub_and_fetch (AL+19, 0xb000e0000000ll); ++ ++ __sync_and_and_fetch (AL+20, 0x300000007ll); ++ __sync_or_and_fetch (AL+21, 0x500000009ll); ++ __sync_xor_and_fetch (AL+22, 0xe00000001ll); ++ __sync_nand_and_fetch (AL+23, 0xa00000007ll); ++} ++ ++/* Now check return values. */ ++static void ++do_ret_di (void) ++{ ++ if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) != ++ 0x100000002ll) abort (); ++ if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) != ++ 1) abort (); ++ if (__sync_lock_test_and_set (AL+2, 1) != 0) abort (); ++ __sync_lock_release (AL+3); /* no return value, but keep to match results. */ ++ ++ /* The following tests should not change the value since the ++ original does NOT match. */ ++ if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) != ++ 0x100000002ll) abort (); ++ if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) != ++ 0x100000002ll) abort (); ++ if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) != ++ 0) abort (); ++ if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) != ++ 0) abort (); ++ ++ if (__sync_fetch_and_add (AL+8, 1) != 0) abort (); ++ if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort (); ++ if (__sync_fetch_and_sub (AL+10, 22) != 42) abort (); ++ if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll) ++ abort (); ++ ++ if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort (); ++ if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort (); ++ if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort (); ++ if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort (); ++ ++ /* These should be the same as the fetch_and_* cases except for ++ return value. */ ++ if (__sync_add_and_fetch (AL+16, 1) != 1) abort (); ++ if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll) ++ abort (); ++ if (__sync_sub_and_fetch (AL+18, 22) != 20) abort (); ++ if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll) ++ abort (); ++ ++ if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort (); ++ if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort (); ++ if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort (); ++ if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort (); ++} ++ ++int main () ++{ ++ memcpy (AL, init_di, sizeof (init_di)); ++ ++ do_noret_di (); ++ ++ if (memcmp (AL, test_di, sizeof (test_di))) ++ abort (); ++ ++ memcpy (AL, init_di, sizeof (init_di)); ++ ++ do_ret_di (); ++ ++ if (memcmp (AL, test_di, sizeof (test_di))) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.dg/di-sync-multithread.c ++++ b/src/gcc/testsuite/gcc.dg/di-sync-multithread.c +@@ -0,0 +1,205 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sync_longlong } */ ++/* { dg-require-effective-target pthread_h } */ ++/* { dg-require-effective-target pthread } */ ++/* { dg-options "-pthread -std=gnu99" } */ ++ ++/* test of long long atomic ops performed in parallel in 3 pthreads ++ david.gilbert@linaro.org */ ++ ++#include ++#include ++ ++/*#define DEBUGIT 1 */ ++ ++#ifdef DEBUGIT ++#include ++ ++#define DOABORT(x,...) {\ ++ fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\ ++ } ++ ++#else ++ ++#define DOABORT(x,...) abort (); ++ ++#endif ++ ++/* Passed to each thread to describe which bits it is going to work on. */ ++struct threadwork { ++ unsigned long long count; /* incremented each time the worker loops. */ ++ unsigned int thread; /* ID */ ++ unsigned int addlsb; /* 8 bit */ ++ unsigned int logic1lsb; /* 5 bit */ ++ unsigned int logic2lsb; /* 8 bit */ ++}; ++ ++/* The shared word where all the atomic work is done. */ ++static volatile long long workspace; ++ ++/* A shared word to tell the workers to quit when non-0. */ ++static long long doquit; ++ ++extern void abort (void); ++ ++/* Note this test doesn't test the return values much. */ ++void* ++worker (void* data) ++{ ++ struct threadwork *tw = (struct threadwork*)data; ++ long long add1bit = 1ll << tw->addlsb; ++ long long logic1bit = 1ll << tw->logic1lsb; ++ long long logic2bit = 1ll << tw->logic2lsb; ++ ++ /* Clear the bits we use. */ ++ __sync_and_and_fetch (&workspace, ~(0xffll * add1bit)); ++ __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit)); ++ __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit)); ++ ++ do ++ { ++ long long tmp1, tmp2, tmp3; ++ /* OK, lets try and do some stuff to the workspace - by the end ++ of the main loop our area should be the same as it is now - i.e. 0. */ ++ ++ /* Push the arithmetic section upto 128 - one of the threads will ++ case this to carry accross the 32bit boundary. */ ++ for (tmp2 = 0; tmp2 < 64; tmp2++) ++ { ++ /* Add 2 using the two different adds. */ ++ tmp1 = __sync_add_and_fetch (&workspace, add1bit); ++ tmp3 = __sync_fetch_and_add (&workspace, add1bit); ++ ++ /* The value should be the intermediate add value in both cases. */ ++ if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff))) ++ DOABORT ("Mismatch of add intermediates on thread %d " ++ "workspace=0x%llx tmp1=0x%llx " ++ "tmp2=0x%llx tmp3=0x%llx\n", ++ tw->thread, workspace, tmp1, tmp2, tmp3); ++ } ++ ++ /* Set the logic bits. */ ++ tmp2=__sync_or_and_fetch (&workspace, ++ 0x1fll * logic1bit | 0xffll * logic2bit); ++ ++ /* Check the logic bits are set and the arithmetic value is correct. */ ++ if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit + | 0xffll * add1bit)) + != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit)) + DOABORT ("Midloop check failed on thread %d " @@ -24349,6 +24086,46 @@ + +/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */ +/* { dg-final { cleanup-rtl-dump "sms" } } */ +--- a/src/gcc/testsuite/gcc.dg/sms-11.c ++++ b/src/gcc/testsuite/gcc.dg/sms-11.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */ ++ ++extern void abort (void); ++ ++float out[4][4] = { 6, 6, 7, 5, 6, 7, 5, 5, 6, 4, 4, 4, 6, 2, 3, 4 }; ++ ++void ++invert (void) ++{ ++ int i, j, k = 0, swap; ++ float tmp[4][4] = { 5, 6, 7, 5, 6, 7, 5, 5, 4, 4, 4, 4, 3, 2, 3, 4 }; ++ ++ for (i = 0; i < 4; i++) ++ { ++ for (j = i + 1; j < 4; j++) ++ if (tmp[j][i] > tmp[i][i]) ++ swap = j; ++ ++ if (swap != i) ++ tmp[i][k] = tmp[swap][k]; ++ } ++ ++ for (i = 0; i < 4; i++) ++ for (j = 0; j < 4; j++) ++ if (tmp[i][j] != out[i][j]) ++ abort (); ++} ++ ++int ++main () ++{ ++ invert (); ++ return 0; ++} ++ ++/* { dg-final { cleanup-rtl-dump "sms" } } */ --- a/src/gcc/testsuite/gcc.dg/sms-9.c +++ b/src/gcc/testsuite/gcc.dg/sms-9.c @@ -0,0 +1,60 @@ @@ -24452,18 +24229,6 @@ +} + +/* { dg-final { scan-assembler "abort" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c -+++ b/src/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c -@@ -26,7 +26,7 @@ - } - } - --/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided_wide } } } */ --/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided_wide } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ - /* { dg-final { cleanup-tree-dump "vect" } } */ - --- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-11.c +++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-11.c @@ -48,7 +48,6 @@ @@ -25425,8 +25190,8 @@ +/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c -+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c +--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c ++++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c @@ -53,6 +53,7 @@ } @@ -25436,8 +25201,8 @@ +/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c -+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c +--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c ++++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c @@ -53,6 +53,7 @@ } @@ -25467,6 +25232,18 @@ +/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c ++++ b/src/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c +@@ -26,7 +26,7 @@ + } + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + --- a/src/gcc/testsuite/gcc.dg/vect/pr30843.c +++ b/src/gcc/testsuite/gcc.dg/vect/pr30843.c @@ -20,6 +20,6 @@ @@ -25527,122 +25304,65 @@ + +/* { dg-final { cleanup-tree-dump "vect" } } */ + ---- a/src/gcc/testsuite/gcc.dg/vect/slp-11.c -+++ b/src/gcc/testsuite/gcc.dg/vect/slp-11.c -@@ -1,113 +0,0 @@ --/* { dg-require-effective-target vect_int } */ -- --#include --#include "tree-vect.h" -- --#define N 8 -- --int --main1 () --{ -- int i; -- unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; -- unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; -- float out2[N*8]; -- -- /* Different operations - not SLPable. */ -- for (i = 0; i < N; i++) -- { -- a0 = in[i*8] + 5; -- a1 = in[i*8 + 1] * 6; -- a2 = in[i*8 + 2] + 7; -- a3 = in[i*8 + 3] + 8; -- a4 = in[i*8 + 4] + 9; -- a5 = in[i*8 + 5] + 10; -- a6 = in[i*8 + 6] + 11; -- a7 = in[i*8 + 7] + 12; -- -- b0 = a0 * 3; -- b1 = a1 * 2; -- b2 = a2 * 12; -- b3 = a3 * 5; -- b4 = a4 * 8; -- b5 = a5 * 4; -- b6 = a6 * 3; -- b7 = a7 * 2; -- -- out[i*8] = b0 - 2; -- out[i*8 + 1] = b1 - 3; -- out[i*8 + 2] = b2 - 2; -- out[i*8 + 3] = b3 - 1; -- out[i*8 + 4] = b4 - 8; -- out[i*8 + 5] = b5 - 7; -- out[i*8 + 6] = b6 - 3; -- out[i*8 + 7] = b7 - 7; -- } -- -- /* check results: */ -- for (i = 0; i < N; i++) -- { -- if (out[i*8] != (in[i*8] + 5) * 3 - 2 -- || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3 -- || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2 -- || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1 -- || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8 -- || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7 -- || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3 -- || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7) -- abort (); -- } -- -- /* Requires permutation - not SLPable. */ -- for (i = 0; i < N*2; i++) -- { -- out[i*4] = (in[i*4] + 2) * 3; -- out[i*4 + 1] = (in[i*4 + 2] + 2) * 7; -- out[i*4 + 2] = (in[i*4 + 1] + 7) * 3; -- out[i*4 + 3] = (in[i*4 + 3] + 3) * 4; -- } -- -- /* check results: */ -- for (i = 0; i < N*2; i++) -- { -- if (out[i*4] != (in[i*4] + 2) * 3 -- || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7 -- || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3 -- || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4) -- abort (); -- } -- -- /* Different operations - not SLPable. */ -- for (i = 0; i < N*4; i++) -- { -- out2[i*2] = ((float) in[i*2] * 2 + 6) ; -- out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7); -- } -- -- /* check results: */ -- for (i = 0; i < N*4; i++) -- { -- if (out2[i*2] != ((float) in[i*2] * 2 + 6) -- || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7)) -- abort (); -- } -- -- -- return 0; --} -- --int main (void) --{ -- check_vect (); -- -- main1 (); -- -- return 0; --} -- --/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided_wide } && vect_int_mult } } } } */ --/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { { ! vect_uintfloat_cvt } && vect_strided_wide } && vect_int_mult } } } } */ --/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult && vect_strided_wide } } } } } */ --/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ --/* { dg-final { cleanup-tree-dump "vect" } } */ -- +--- a/src/gcc/testsuite/gcc.dg/vect/pr51301.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr51301.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++typedef signed char int8_t; ++typedef signed long long int64_t; ++int64_t ++f0a (int8_t * __restrict__ arg1) ++{ ++ int idx; ++ int64_t result = 0; ++ for (idx = 0; idx < 416; idx += 1) ++ result += arg1[idx] << (arg1[idx] == arg1[idx]); ++ return result; ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/pr51799.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr51799.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++ ++typedef signed char int8_t; ++typedef unsigned char uint8_t; ++typedef signed short int16_t; ++typedef unsigned long uint32_t; ++void ++f0a (uint32_t * __restrict__ result, int8_t * __restrict__ arg1, ++ uint32_t * __restrict__ arg4, int8_t temp_6) ++{ ++ int idx; ++ for (idx = 0; idx < 416; idx += 1) ++ { ++ result[idx] = (uint8_t)(((arg1[idx] << 7) + arg4[idx]) * temp_6); ++ } ++} ++ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/pr52870.c ++++ b/src/gcc/testsuite/gcc.dg/vect/pr52870.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1 -ftree-vectorize" } */ ++ ++long ++test (int *x) ++{ ++ unsigned long sx, xprec; ++ ++ sx = *x >= 0 ? *x : -*x; ++ ++ xprec = sx * 64; ++ ++ if (sx < 16384) ++ foo (sx); ++ ++ return xprec; ++} --- a/src/gcc/testsuite/gcc.dg/vect/slp-11a.c +++ b/src/gcc/testsuite/gcc.dg/vect/slp-11a.c @@ -0,0 +1,75 @@ @@ -25773,6 +25493,122 @@ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 && vect_int_mult } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/slp-11.c ++++ b/src/gcc/testsuite/gcc.dg/vect/slp-11.c +@@ -1,113 +0,0 @@ +-/* { dg-require-effective-target vect_int } */ +- +-#include +-#include "tree-vect.h" +- +-#define N 8 +- +-int +-main1 () +-{ +- int i; +- unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7; +- unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; +- float out2[N*8]; +- +- /* Different operations - not SLPable. */ +- for (i = 0; i < N; i++) +- { +- a0 = in[i*8] + 5; +- a1 = in[i*8 + 1] * 6; +- a2 = in[i*8 + 2] + 7; +- a3 = in[i*8 + 3] + 8; +- a4 = in[i*8 + 4] + 9; +- a5 = in[i*8 + 5] + 10; +- a6 = in[i*8 + 6] + 11; +- a7 = in[i*8 + 7] + 12; +- +- b0 = a0 * 3; +- b1 = a1 * 2; +- b2 = a2 * 12; +- b3 = a3 * 5; +- b4 = a4 * 8; +- b5 = a5 * 4; +- b6 = a6 * 3; +- b7 = a7 * 2; +- +- out[i*8] = b0 - 2; +- out[i*8 + 1] = b1 - 3; +- out[i*8 + 2] = b2 - 2; +- out[i*8 + 3] = b3 - 1; +- out[i*8 + 4] = b4 - 8; +- out[i*8 + 5] = b5 - 7; +- out[i*8 + 6] = b6 - 3; +- out[i*8 + 7] = b7 - 7; +- } +- +- /* check results: */ +- for (i = 0; i < N; i++) +- { +- if (out[i*8] != (in[i*8] + 5) * 3 - 2 +- || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3 +- || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2 +- || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1 +- || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8 +- || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7 +- || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3 +- || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7) +- abort (); +- } +- +- /* Requires permutation - not SLPable. */ +- for (i = 0; i < N*2; i++) +- { +- out[i*4] = (in[i*4] + 2) * 3; +- out[i*4 + 1] = (in[i*4 + 2] + 2) * 7; +- out[i*4 + 2] = (in[i*4 + 1] + 7) * 3; +- out[i*4 + 3] = (in[i*4 + 3] + 3) * 4; +- } +- +- /* check results: */ +- for (i = 0; i < N*2; i++) +- { +- if (out[i*4] != (in[i*4] + 2) * 3 +- || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7 +- || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3 +- || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4) +- abort (); +- } +- +- /* Different operations - not SLPable. */ +- for (i = 0; i < N*4; i++) +- { +- out2[i*2] = ((float) in[i*2] * 2 + 6) ; +- out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7); +- } +- +- /* check results: */ +- for (i = 0; i < N*4; i++) +- { +- if (out2[i*2] != ((float) in[i*2] * 2 + 6) +- || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7)) +- abort (); +- } +- +- +- return 0; +-} +- +-int main (void) +-{ +- check_vect (); +- +- main1 (); +- +- return 0; +-} +- +-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided_wide } && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { { ! vect_uintfloat_cvt } && vect_strided_wide } && vect_int_mult } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult && vect_strided_wide } } } } } */ +-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ +-/* { dg-final { cleanup-tree-dump "vect" } } */ +- --- a/src/gcc/testsuite/gcc.dg/vect/slp-11c.c +++ b/src/gcc/testsuite/gcc.dg/vect/slp-11c.c @@ -0,0 +1,46 @@ @@ -25961,6 +25797,131 @@ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided8 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/slp-19a.c ++++ b/src/gcc/testsuite/gcc.dg/vect/slp-19a.c +@@ -0,0 +1,61 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include ++#include "tree-vect.h" ++ ++#define N 16 ++ ++int ++main1 () ++{ ++ unsigned int i; ++ unsigned int out[N*8]; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ unsigned int ia[N*2]; ++ ++ for (i = 0; i < N; i++) ++ { ++ out[i*8] = in[i*8]; ++ out[i*8 + 1] = in[i*8 + 1]; ++ out[i*8 + 2] = in[i*8 + 2]; ++ out[i*8 + 3] = in[i*8 + 3]; ++ out[i*8 + 4] = in[i*8 + 4]; ++ out[i*8 + 5] = in[i*8 + 5]; ++ out[i*8 + 6] = in[i*8 + 6]; ++ out[i*8 + 7] = in[i*8 + 7]; ++ ++ ia[i] = in[i*8 + 2]; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N; i++) ++ { ++ if (out[i*8] != in[i*8] ++ || out[i*8 + 1] != in[i*8 + 1] ++ || out[i*8 + 2] != in[i*8 + 2] ++ || out[i*8 + 3] != in[i*8 + 3] ++ || out[i*8 + 4] != in[i*8 + 4] ++ || out[i*8 + 5] != in[i*8 + 5] ++ || out[i*8 + 6] != in[i*8 + 6] ++ || out[i*8 + 7] != in[i*8 + 7] ++ || ia[i] != in[i*8 + 2]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided8 } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided8 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided8} } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/slp-19b.c ++++ b/src/gcc/testsuite/gcc.dg/vect/slp-19b.c +@@ -0,0 +1,58 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include ++#include "tree-vect.h" ++ ++#define N 16 ++ ++int ++main1 () ++{ ++ unsigned int i; ++ unsigned int out[N*8]; ++ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; ++ unsigned int ia[N*2], a0, a1, a2, a3; ++ ++ for (i = 0; i < N*2; i++) ++ { ++ a0 = in[i*4] + 1; ++ a1 = in[i*4 + 1] + 2; ++ a2 = in[i*4 + 2] + 3; ++ a3 = in[i*4 + 3] + 4; ++ ++ out[i*4] = a0; ++ out[i*4 + 1] = a1; ++ out[i*4 + 2] = a2; ++ out[i*4 + 3] = a3; ++ ++ ia[i] = a2; ++ } ++ ++ /* check results: */ ++ for (i = 0; i < N*2; i++) ++ { ++ if (out[i*4] != in[i*4] + 1 ++ || out[i*4 + 1] != in[i*4 + 1] + 2 ++ || out[i*4 + 2] != in[i*4 + 2] + 3 ++ || out[i*4 + 3] != in[i*4 + 3] + 4 ++ || ia[i] != in[i*4 + 2] + 3) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ check_vect (); ++ ++ main1 (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/slp-19.c +++ b/src/gcc/testsuite/gcc.dg/vect/slp-19.c @@ -1,154 +0,0 @@ @@ -26118,131 +26079,6 @@ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! { vect_strided_wide } } } } } */ -/* { dg-final { cleanup-tree-dump "vect" } } */ - ---- a/src/gcc/testsuite/gcc.dg/vect/slp-19a.c -+++ b/src/gcc/testsuite/gcc.dg/vect/slp-19a.c -@@ -0,0 +1,61 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include -+#include "tree-vect.h" -+ -+#define N 16 -+ -+int -+main1 () -+{ -+ unsigned int i; -+ unsigned int out[N*8]; -+ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; -+ unsigned int ia[N*2]; -+ -+ for (i = 0; i < N; i++) -+ { -+ out[i*8] = in[i*8]; -+ out[i*8 + 1] = in[i*8 + 1]; -+ out[i*8 + 2] = in[i*8 + 2]; -+ out[i*8 + 3] = in[i*8 + 3]; -+ out[i*8 + 4] = in[i*8 + 4]; -+ out[i*8 + 5] = in[i*8 + 5]; -+ out[i*8 + 6] = in[i*8 + 6]; -+ out[i*8 + 7] = in[i*8 + 7]; -+ -+ ia[i] = in[i*8 + 2]; -+ } -+ -+ /* check results: */ -+ for (i = 0; i < N; i++) -+ { -+ if (out[i*8] != in[i*8] -+ || out[i*8 + 1] != in[i*8 + 1] -+ || out[i*8 + 2] != in[i*8 + 2] -+ || out[i*8 + 3] != in[i*8 + 3] -+ || out[i*8 + 4] != in[i*8 + 4] -+ || out[i*8 + 5] != in[i*8 + 5] -+ || out[i*8 + 6] != in[i*8 + 6] -+ || out[i*8 + 7] != in[i*8 + 7] -+ || ia[i] != in[i*8 + 2]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -+int main (void) -+{ -+ check_vect (); -+ -+ main1 (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided8 } } } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided8 } } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided8} } } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/slp-19b.c -+++ b/src/gcc/testsuite/gcc.dg/vect/slp-19b.c -@@ -0,0 +1,58 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include -+#include "tree-vect.h" -+ -+#define N 16 -+ -+int -+main1 () -+{ -+ unsigned int i; -+ unsigned int out[N*8]; -+ unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; -+ unsigned int ia[N*2], a0, a1, a2, a3; -+ -+ for (i = 0; i < N*2; i++) -+ { -+ a0 = in[i*4] + 1; -+ a1 = in[i*4 + 1] + 2; -+ a2 = in[i*4 + 2] + 3; -+ a3 = in[i*4 + 3] + 4; -+ -+ out[i*4] = a0; -+ out[i*4 + 1] = a1; -+ out[i*4 + 2] = a2; -+ out[i*4 + 3] = a3; -+ -+ ia[i] = a2; -+ } -+ -+ /* check results: */ -+ for (i = 0; i < N*2; i++) -+ { -+ if (out[i*4] != in[i*4] + 1 -+ || out[i*4 + 1] != in[i*4 + 1] + 2 -+ || out[i*4 + 2] != in[i*4 + 2] + 3 -+ || out[i*4 + 3] != in[i*4 + 3] + 4 -+ || ia[i] != in[i*4 + 2] + 3) -+ abort (); -+ } -+ -+ return 0; -+} -+ -+int main (void) -+{ -+ check_vect (); -+ -+ main1 (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/slp-19c.c +++ b/src/gcc/testsuite/gcc.dg/vect/slp-19c.c @@ -0,0 +1,95 @@ @@ -26608,26 +26444,6 @@ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + ---- a/src/gcc/testsuite/gcc.dg/vect/vect-1.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-1.c -@@ -85,6 +85,6 @@ - fbar (a); - } - --/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */ --/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_strided2 } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_strided2 } } } */ - /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-10.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-10.c -@@ -22,5 +22,5 @@ - return 0; - } - --/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_extract_even_odd } } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_strided2 } } } } */ - /* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/vect-104.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-104.c @@ -64,6 +64,7 @@ @@ -26658,6 +26474,15 @@ #include #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/vect-10.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-10.c +@@ -22,5 +22,5 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_strided2 } } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/vect-119.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-119.c @@ -0,0 +1,28 @@ @@ -26689,6 +26514,17 @@ + +/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-1.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-1.c +@@ -85,6 +85,6 @@ + fbar (a); + } + +-/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_strided2 } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/vect-40.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-40.c @@ -1,4 +1,5 @@ @@ -26991,41 +26827,105 @@ + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c -@@ -1,5 +1,4 @@ - /* { dg-require-effective-target vect_int } */ --/* { dg-add-options quad_vectors } */ - - #include - #include "tree-vect.h" ---- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c -@@ -1,4 +1,5 @@ - /* { dg-require-effective-target vect_int } */ -+/* { dg-add-options double_vectors } */ - - #include - #include "tree-vect.h" ---- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c -@@ -1,5 +1,4 @@ - /* { dg-require-effective-target vect_int } */ --/* { dg-add-options quad_vectors } */ - - #include - #include "tree-vect.h" ---- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c -@@ -1,4 +1,5 @@ - /* { dg-require-effective-target vect_int } */ -+/* { dg-add-options double_vectors } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect.exp ++++ b/src/gcc/testsuite/gcc.dg/vect/vect.exp +@@ -75,15 +75,20 @@ + lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details" - #include - #include "tree-vect.h" ---- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1.c -@@ -22,5 +22,6 @@ + # Main loop. +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \ +- "" $DEFAULT_VECTCFLAGS +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \ +- "" $DEFAULT_VECTCFLAGS +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \ +- "" $DEFAULT_VECTCFLAGS +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \ +- "" $VECT_SLP_CFLAGS +- ++set VECT_ADDITIONAL_FLAGS [list ""] ++if { [check_effective_target_lto] } { ++ lappend VECT_ADDITIONAL_FLAGS "-flto" ++} ++foreach flags $VECT_ADDITIONAL_FLAGS { ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \ ++ $flags $DEFAULT_VECTCFLAGS ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \ ++ $flags $DEFAULT_VECTCFLAGS ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \ ++ $flags $DEFAULT_VECTCFLAGS ++ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \ ++ $flags $VECT_SLP_CFLAGS ++} + + #### Tests with special options + global SAVED_DEFAULT_VECTCFLAGS +@@ -210,6 +215,12 @@ + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS + ++# -ftree-loop-if-convert-stores ++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS ++lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores" ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]] \ ++ "" $DEFAULT_VECTCFLAGS ++ + # With -O3. + # Don't allow IPA cloning, because it throws our counts out of whack. + set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS +@@ -234,6 +245,18 @@ + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]] \ + "" $VECT_SLP_CFLAGS + ++# -fno-tree-fre ++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS ++lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]] \ ++ "" $DEFAULT_VECTCFLAGS ++ ++# -fno-tree-fre -fno-tree-pre ++set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS ++lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre" ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]] \ ++ "" $DEFAULT_VECTCFLAGS ++ + # Clean up. + set dg-do-what-default ${save-dg-do-what-default} + +--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include + #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include + #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c +@@ -1,5 +1,4 @@ + /* { dg-require-effective-target vect_int } */ +-/* { dg-add-options quad_vectors } */ + + #include + #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c +@@ -1,4 +1,5 @@ + /* { dg-require-effective-target vect_int } */ ++/* { dg-add-options double_vectors } */ + + #include + #include "tree-vect.h" +--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c +@@ -20,5 +20,6 @@ } /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ @@ -27033,9 +26933,9 @@ +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c -@@ -20,5 +20,6 @@ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c +@@ -22,5 +22,6 @@ } /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ @@ -27043,8 +26943,8 @@ +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c +--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1.c @@ -22,5 +22,6 @@ } @@ -27598,8 +27498,8 @@ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c +--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c @@ -71,6 +71,6 @@ return 0; } @@ -27607,9 +27507,9 @@ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ - ---- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c + +--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c @@ -71,6 +71,6 @@ return 0; } @@ -27617,7 +27517,7 @@ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ - + --- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c @@ -72,5 +72,5 @@ @@ -27827,6 +27727,16 @@ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c +@@ -54,6 +54,6 @@ + return 0; + } + +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ + --- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c @@ -71,6 +71,6 @@ @@ -27837,16 +27747,16 @@ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c -@@ -54,6 +54,6 @@ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c +@@ -85,6 +85,6 @@ return 0; } --/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */ +-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ - + --- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c @@ -78,6 +78,6 @@ @@ -27877,16 +27787,6 @@ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c -@@ -85,6 +85,6 @@ - return 0; - } - --/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ - /* { dg-final { cleanup-tree-dump "vect" } } */ - --- a/src/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c @@ -53,6 +53,6 @@ @@ -28041,9 +27941,9 @@ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + ---- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c -@@ -0,0 +1,59 @@ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c +@@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" @@ -28051,26 +27951,22 @@ + +#define N 32 +#define COEF 32470 ++#define COEF2 324700 + +unsigned char in[N]; +int out[N]; ++int out2[N]; + +__attribute__ ((noinline)) void -+foo () -+{ -+ int i; -+ -+ for (i = 0; i < N; i++) -+ out[i] = in[i] * COEF; -+} -+ -+__attribute__ ((noinline)) void -+bar () ++foo (int a) +{ + int i; + + for (i = 0; i < N; i++) -+ out[i] = COEF * in[i]; ++ { ++ out[i] = in[i] * COEF; ++ out2[i] = in[i] + a; ++ } +} + +int main (void) @@ -28083,29 +27979,23 @@ + __asm__ volatile (""); + } + -+ foo (); -+ -+ for (i = 0; i < N; i++) -+ if (out[i] != in[i] * COEF) -+ abort (); -+ -+ bar (); ++ foo (COEF2); + + for (i = 0; i < N; i++) -+ if (out[i] != in[i] * COEF) ++ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) + abort (); + + return 0; +} + -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ -+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ -+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + ---- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c -@@ -0,0 +1,49 @@ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c +@@ -0,0 +1,59 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" @@ -28113,22 +28003,26 @@ + +#define N 32 +#define COEF 32470 -+#define COEF2 324700 + +unsigned char in[N]; +int out[N]; -+int out2[N]; + +__attribute__ ((noinline)) void -+foo (int a) ++foo () +{ + int i; + + for (i = 0; i < N; i++) -+ { -+ out[i] = in[i] * COEF; -+ out2[i] = in[i] + a; -+ } ++ out[i] = in[i] * COEF; ++} ++ ++__attribute__ ((noinline)) void ++bar () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ out[i] = COEF * in[i]; +} + +int main (void) @@ -28141,18 +28035,24 @@ + __asm__ volatile (""); + } + -+ foo (COEF2); ++ foo (); + + for (i = 0; i < N; i++) -+ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2) ++ if (out[i] != in[i] * COEF) ++ abort (); ++ ++ bar (); ++ ++ for (i = 0; i < N; i++) ++ if (out[i] != in[i] * COEF) + abort (); + + return 0; +} + -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ -+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ -+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ ++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + --- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c @@ -28447,7 +28347,7 @@ + --- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c -@@ -0,0 +1,65 @@ +@@ -0,0 +1,64 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_shift } */ + @@ -28509,74 +28409,501 @@ + return 0; +} + -+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */ ++/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h +@@ -1,3 +1,4 @@ ++ + #define IN_FRAMEWORK + + #ifdef VFP +@@ -10,6 +11,13 @@ + #define D6 48 + #define D7 56 + ++#ifdef NEON ++#define Q0 D0 ++#define Q1 D2 ++#define Q2 D4 ++#define Q3 D6 ++#endif + ---- a/src/gcc/testsuite/gcc.dg/vect/vect.exp -+++ b/src/gcc/testsuite/gcc.dg/vect/vect.exp -@@ -75,15 +75,20 @@ - lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details" - - # Main loop. --dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \ -- "" $DEFAULT_VECTCFLAGS --dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \ -- "" $DEFAULT_VECTCFLAGS --dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \ -- "" $DEFAULT_VECTCFLAGS --dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \ -- "" $VECT_SLP_CFLAGS + #define S0 64 + #define S1 68 + #define S2 72 +@@ -27,23 +35,18 @@ + #define S14 120 + #define S15 124 + +-#define R0 128 +-#define R1 132 +-#define R2 136 +-#define R3 140 - -+set VECT_ADDITIONAL_FLAGS [list ""] -+if { [check_effective_target_lto] } { -+ lappend VECT_ADDITIONAL_FLAGS "-flto" -+} -+foreach flags $VECT_ADDITIONAL_FLAGS { -+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \ -+ $flags $DEFAULT_VECTCFLAGS -+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \ -+ $flags $DEFAULT_VECTCFLAGS -+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]] \ -+ $flags $DEFAULT_VECTCFLAGS -+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]] \ -+ $flags $VECT_SLP_CFLAGS -+} - - #### Tests with special options - global SAVED_DEFAULT_VECTCFLAGS -@@ -210,6 +215,12 @@ - dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]] \ - "" $DEFAULT_VECTCFLAGS +-#define STACK 144 +- ++#define CORE_REG_START 128 + #else ++#define CORE_REG_START 0 ++#endif -+# -ftree-loop-if-convert-stores -+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS -+lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores" -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]] \ -+ "" $DEFAULT_VECTCFLAGS +-#define R0 0 +-#define R1 4 +-#define R2 8 +-#define R3 12 ++#define R0 CORE_REG_START ++#define R1 (R0 + 4) ++#define R2 (R1 + 4) ++#define R3 (R2 + 4) ++#define STACK (R3 + 4) + +-#define STACK 16 + +-#endif + + extern void abort (void); + +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-constants.h ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-constants.h +@@ -0,0 +1,33 @@ + - # With -O3. - # Don't allow IPA cloning, because it throws our counts out of whack. - set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS -@@ -234,6 +245,18 @@ - dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]] \ - "" $VECT_SLP_CFLAGS - -+# -fno-tree-fre -+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS -+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]] \ -+ "" $DEFAULT_VECTCFLAGS + -+# -fno-tree-fre -fno-tree-pre -+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS -+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre" -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]] \ -+ "" $DEFAULT_VECTCFLAGS ++#include "arm_neon.h" + - # Clean up. - set dg-do-what-default ${save-dg-do-what-default} ++const int32x4_t i32x4_constvec1 = { 1101, 1102, 1103, 1104}; ++const int32x4_t i32x4_constvec2 = { 2101, 2102, 2103, 2104}; ++ ++#define ELEM(INDEX) .val[INDEX] ++ ++const int32x4x2_t i32x4x2_constvec1 = {ELEM(0) = {0xaddebccb,11,12,13}, ++ ELEM(1) = {14, 15, 16, 17} }; ++ ++const int32x4x2_t i32x4x2_constvec2 = { ELEM(0) = {0xaadebcca,11,12,13}, ++ ELEM(1) = {140, 15, 16, 17}}; ++ ++const int32x4x3_t i32x4x3_constvec1 = { ELEM(0) = {0xabbccdde,8, 9, 10}, ++ ELEM(1) = {0xabcccdde, 26, 27, 28}, ++ ELEM(2) = {0xaccccddf, 29, 30, 31}}; ++ ++const int32x4x3_t i32x4x3_constvec2 = { ELEM(0) = {0xbccccdd0,8, 9, 10}, ++ ELEM(1) = {0xbdfe1000, 26, 27, 28}, ++ ELEM(2) = {0xaccccddf, 29, 30, 31}}; ++const float32x4x2_t f32x4x2_constvec1 = ++ { ELEM(0) = { 7.101f, 0.201f, 0.301f, 0.401f} , ++ ELEM(1) = { 8.101f, 0.501f, 0.601f, 0.701f} }; ++ ++const float32x4x2_t f32x4x2_constvec2 = ++ { ELEM(0) = { 11.99f , 11.21f, 1.27f, 8.74f}, ++ ELEM(1) = { 13.45f , 1.23f ,1.24f, 1.26f}}; ++ ++const int32x2_t i32x2_constvec1 = { 1283, 1345 }; ++const int32x2x2_t i32x2x2_constvec1 = { ELEM(0) = { 0xabcdefab, 32 }, ++ ELEM(1) = { 0xabcdefbc, 33 }}; +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect1.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect1.c +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect1.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(double, 12.0, D3) /* Backfill this particular argument. */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++ARG(float, 5.0f, STACK+sizeof(int32x4x2_t)) /* No backfill allowed. */ ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect2.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect2.c +@@ -0,0 +1,23 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect2.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1. */ ++ARG(float, 3.0f, S4) /* D2, Q1 occupied. */ ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect3.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect3.c +@@ -0,0 +1,26 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect3.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++ARG(double, 11.0, STACK+sizeof(int32x4x2_t)) /* No backfill in D3. */ ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect4.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect4.c +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect4.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(double, 12.0, D3) /* Backfill this particular argument. */ ++ARG(float, 5.0f, S5) /* Backfill in S5. */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect5.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect5.c +@@ -0,0 +1,28 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect5.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(float, 3.0f, S4) /* D2, Q1 */ ++ARG(float32x4x2_t, f32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */ ++ARG(double, 12.0, D3) /* Backfill this particular argument. */ ++ARG(int32x4x2_t, i32x4x2_constvec2, STACK) ++ARG(float, 5.0f, STACK+sizeof(int32x4x2_t)) /* No backfill allowed. */ ++LAST_ARG(int, 3, R0) ++ ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect6.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect6.c +@@ -0,0 +1,24 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect6.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */ ++ARG(int32x4x3_t, i32x4x3_constvec1, Q1) /* Q1, Q2, Q3 */ ++ARG(int32x4x3_t, i32x4x3_constvec2, STACK) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect7.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect7.c +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect7.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(float, 24.3f, S0) /* S0 , D0, Q0 */ ++ARG(int32x4x3_t, i32x4x3_constvec1, Q1) /* Q1, Q2, Q3 */ ++ARG(double, 25.6, D1) ++ARG(float, 12.67f, S1) ++ARG(int32x4x3_t, i32x4x3_constvec2, STACK) ++ARG(double, 2.47, STACK+sizeof(int32x4x3_t)) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect8.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect8.c +@@ -0,0 +1,27 @@ ++/* Test AAPCS layout (VFP variant for Neon types) */ ++ ++/* { dg-do run { target arm*-*-*eabi* } } */ ++/* { dg-require-effective-target arm_hard_vfp_ok } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-require-effective-target arm32 } */ ++/* { dg-add-options arm_neon } */ ++ ++ ++#ifndef IN_FRAMEWORK ++#define VFP ++#define NEON ++#define TESTFILE "neon-vect8.c" ++#include "neon-constants.h" ++ ++ ++#include "abitest.h" ++#else ++ ++ARG(float, 24.3f, S0) /* S0 , D0, Q0 */ ++ARG(int32x2_t, i32x2_constvec1, D1) /* D1 */ ++ARG(double, 25.6, D2) ++ARG(float, 12.67f, S1) ++ARG(int32x4x3_t, i32x4x3_constvec2, STACK) ++ARG(double, 2.47, STACK+sizeof(int32x4x3_t)) ++LAST_ARG(int, 3, R0) ++#endif +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp10.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp10.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp11.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp11.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp12.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp12.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp13.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp13.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp14.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp14.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp15.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp15.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp16.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp17.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp17.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp1.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp1.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp2.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp2.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp3.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp3.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp4.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp4.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp5.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp5.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp6.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp6.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp7.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp7.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp8.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ +--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp9.c ++++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp9.c +@@ -1,6 +1,6 @@ + /* Test AAPCS layout (VFP variant) */ + +-/* { dg-do run { target arm*-*-eabi* } } */ ++/* { dg-do run { target arm*-*-*eabi* } } */ + /* { dg-require-effective-target arm_hard_vfp_ok } */ + /* { dg-require-effective-target arm32 } */ + /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */ --- a/src/gcc/testsuite/gcc.target/arm/cmp-1.c +++ b/src/gcc/testsuite/gcc.target/arm/cmp-1.c @@ -0,0 +1,37 @@ @@ -28701,11 +29028,27 @@ +#include "../../gcc.dg/di-longlong64-sync-1.c" + +/* We should be using ldrexd, strexd and no helpers or shorter ldrex. */ -+/* { dg-final { scan-assembler-times "\tldrexd" 46 } } */ -+/* { dg-final { scan-assembler-times "\tstrexd" 46 } } */ ++/* { dg-final { scan-assembler-times "\tldrexd" 48 } } */ ++/* { dg-final { scan-assembler-times "\tstrexd" 48 } } */ +/* { dg-final { scan-assembler-not "__sync_" } } */ +/* { dg-final { scan-assembler-not "ldrex\t" } } */ +/* { dg-final { scan-assembler-not "strex\t" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/lp1013209.c ++++ b/src/gcc/testsuite/gcc.target/arm/lp1013209.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++#include "arm_neon.h" ++ ++void foo (void) ++{ ++ int16_t buffer[2048]; ++ int f; ++ for (f = 0; f < 128; f += 8) ++ vst1q_u16 (&buffer[f], (uint16x8_t){0}); ++ ++} --- a/src/gcc/testsuite/gcc.target/arm/mla-2.c +++ b/src/gcc/testsuite/gcc.target/arm/mla-2.c @@ -0,0 +1,9 @@ @@ -28718,73 +29061,298 @@ +} + +/* { dg-final { scan-assembler "smlalbb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c @@ -15,5 +15,5 @@ - out_float32x4_t = vld1q_dup_f32 (0); + out_float32x2_t = vld1_dup_f32 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c @@ -15,5 +15,5 @@ - out_poly16x8_t = vld1q_dup_p16 (0); + out_poly16x4_t = vld1_dup_p16 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c @@ -15,5 +15,5 @@ - out_poly8x16_t = vld1q_dup_p8 (0); + out_poly8x8_t = vld1_dup_p8 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c @@ -15,5 +15,5 @@ - out_int16x8_t = vld1q_dup_s16 (0); + out_int16x4_t = vld1_dup_s16 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c @@ -15,5 +15,5 @@ - out_int32x4_t = vld1q_dup_s32 (0); + out_int32x2_t = vld1_dup_s32 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c @@ -15,5 +15,5 @@ - out_int64x2_t = vld1q_dup_s64 (0); + out_int64x1_t = vld1_dup_s64 (0); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c @@ -15,5 +15,5 @@ - out_int8x16_t = vld1q_dup_s8 (0); + out_int8x8_t = vld1_dup_s8 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c @@ -15,5 +15,5 @@ - out_uint16x8_t = vld1q_dup_u16 (0); + out_uint16x4_t = vld1_dup_u16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c +@@ -15,5 +15,5 @@ + out_uint32x2_t = vld1_dup_u32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c +@@ -15,5 +15,5 @@ + out_uint64x1_t = vld1_dup_u64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c +@@ -15,5 +15,5 @@ + out_uint8x8_t = vld1_dup_u8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c +@@ -15,5 +15,5 @@ + out_float32x2_t = vld1_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c +@@ -16,5 +16,5 @@ + out_float32x2_t = vld1_lane_f32 (0, arg1_float32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c +@@ -16,5 +16,5 @@ + out_poly16x4_t = vld1_lane_p16 (0, arg1_poly16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c +@@ -16,5 +16,5 @@ + out_poly8x8_t = vld1_lane_p8 (0, arg1_poly8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c +@@ -16,5 +16,5 @@ + out_int16x4_t = vld1_lane_s16 (0, arg1_int16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c +@@ -16,5 +16,5 @@ + out_int32x2_t = vld1_lane_s32 (0, arg1_int32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c +@@ -16,5 +16,5 @@ + out_int64x1_t = vld1_lane_s64 (0, arg1_int64x1_t, 0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c +@@ -16,5 +16,5 @@ + out_int8x8_t = vld1_lane_s8 (0, arg1_int8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c +@@ -16,5 +16,5 @@ + out_uint16x4_t = vld1_lane_u16 (0, arg1_uint16x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c +@@ -16,5 +16,5 @@ + out_uint32x2_t = vld1_lane_u32 (0, arg1_uint32x2_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c +@@ -16,5 +16,5 @@ + out_uint64x1_t = vld1_lane_u64 (0, arg1_uint64x1_t, 0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c +@@ -16,5 +16,5 @@ + out_uint8x8_t = vld1_lane_u8 (0, arg1_uint8x8_t, 1); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c +@@ -15,5 +15,5 @@ + out_poly16x4_t = vld1_p16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c +@@ -15,5 +15,5 @@ + out_poly8x8_t = vld1_p8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c +@@ -15,5 +15,5 @@ + out_float32x4_t = vld1q_dup_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c +@@ -15,5 +15,5 @@ + out_poly16x8_t = vld1q_dup_p16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c +@@ -15,5 +15,5 @@ + out_poly8x16_t = vld1q_dup_p8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c +@@ -15,5 +15,5 @@ + out_int16x8_t = vld1q_dup_s16 (0); + } + +-/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c +@@ -15,5 +15,5 @@ + out_int32x4_t = vld1q_dup_s32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c +@@ -15,5 +15,5 @@ + out_int64x2_t = vld1q_dup_s64 (0); + } + +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c +@@ -15,5 +15,5 @@ + out_int8x16_t = vld1q_dup_s8 (0); + } + +-/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c +@@ -15,5 +15,5 @@ + out_uint16x8_t = vld1q_dup_u16 (0); } -/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ @@ -28817,6 +29385,15 @@ -/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c +@@ -15,5 +15,5 @@ + out_float32x4_t = vld1q_f32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c @@ -16,5 +16,5 @@ @@ -28916,15 +29493,6 @@ -/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c -@@ -15,5 +15,5 @@ - out_float32x4_t = vld1q_f32 (0); - } - --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c @@ -15,5 +15,5 @@ @@ -29015,302 +29583,295 @@ -/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c @@ -15,5 +15,5 @@ - out_float32x2_t = vld1_dup_f32 (0); + out_int16x4_t = vld1_s16 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c @@ -15,5 +15,5 @@ - out_poly16x4_t = vld1_dup_p16 (0); + out_int32x2_t = vld1_s32 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c @@ -15,5 +15,5 @@ - out_poly8x8_t = vld1_dup_p8 (0); + out_int64x1_t = vld1_s64 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c @@ -15,5 +15,5 @@ - out_int16x4_t = vld1_dup_s16 (0); + out_int8x8_t = vld1_s8 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c @@ -15,5 +15,5 @@ - out_int32x2_t = vld1_dup_s32 (0); + out_uint16x4_t = vld1_u16 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c @@ -15,5 +15,5 @@ - out_int64x1_t = vld1_dup_s64 (0); + out_uint32x2_t = vld1_u32 (0); + } + +-/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c +@@ -15,5 +15,5 @@ + out_uint64x1_t = vld1_u64 (0); } -/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c @@ -15,5 +15,5 @@ - out_int8x8_t = vld1_dup_s8 (0); + out_uint8x8_t = vld1_u8 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c @@ -15,5 +15,5 @@ - out_uint16x4_t = vld1_dup_u16 (0); + out_float32x2x2_t = vld2_dup_f32 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c @@ -15,5 +15,5 @@ - out_uint32x2_t = vld1_dup_u32 (0); + out_poly16x4x2_t = vld2_dup_p16 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c @@ -15,5 +15,5 @@ - out_uint64x1_t = vld1_dup_u64 (0); + out_poly8x8x2_t = vld2_dup_p8 (0); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c @@ -15,5 +15,5 @@ - out_uint8x8_t = vld1_dup_u8 (0); + out_int16x4x2_t = vld2_dup_s16 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c -@@ -16,5 +16,5 @@ - out_float32x2_t = vld1_lane_f32 (0, arg1_float32x2_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c +@@ -15,5 +15,5 @@ + out_int32x2x2_t = vld2_dup_s32 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c -@@ -16,5 +16,5 @@ - out_poly16x4_t = vld1_lane_p16 (0, arg1_poly16x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c +@@ -15,5 +15,5 @@ + out_int64x1x2_t = vld2_dup_s64 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c -@@ -16,5 +16,5 @@ - out_poly8x8_t = vld1_lane_p8 (0, arg1_poly8x8_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c +@@ -15,5 +15,5 @@ + out_int8x8x2_t = vld2_dup_s8 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c -@@ -16,5 +16,5 @@ - out_int16x4_t = vld1_lane_s16 (0, arg1_int16x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c +@@ -15,5 +15,5 @@ + out_uint16x4x2_t = vld2_dup_u16 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c -@@ -16,5 +16,5 @@ - out_int32x2_t = vld1_lane_s32 (0, arg1_int32x2_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c +@@ -15,5 +15,5 @@ + out_uint32x2x2_t = vld2_dup_u32 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c -@@ -16,5 +16,5 @@ - out_int64x1_t = vld1_lane_s64 (0, arg1_int64x1_t, 0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c +@@ -15,5 +15,5 @@ + out_uint64x1x2_t = vld2_dup_u64 (0); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c -@@ -16,5 +16,5 @@ - out_int8x8_t = vld1_lane_s8 (0, arg1_int8x8_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c +@@ -15,5 +15,5 @@ + out_uint8x8x2_t = vld2_dup_u8 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c -@@ -16,5 +16,5 @@ - out_uint16x4_t = vld1_lane_u16 (0, arg1_uint16x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c +@@ -15,5 +15,5 @@ + out_float32x2x2_t = vld2_f32 (0); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c @@ -16,5 +16,5 @@ - out_uint32x2_t = vld1_lane_u32 (0, arg1_uint32x2_t, 1); + out_float32x2x2_t = vld2_lane_f32 (0, arg1_float32x2x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c @@ -16,5 +16,5 @@ - out_uint64x1_t = vld1_lane_u64 (0, arg1_uint64x1_t, 0); + out_poly16x4x2_t = vld2_lane_p16 (0, arg1_poly16x4x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c @@ -16,5 +16,5 @@ - out_uint8x8_t = vld1_lane_u8 (0, arg1_uint8x8_t, 1); + out_poly8x8x2_t = vld2_lane_p8 (0, arg1_poly8x8x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c -@@ -15,5 +15,5 @@ - out_float32x2_t = vld1_f32 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c +@@ -16,5 +16,5 @@ + out_int16x4x2_t = vld2_lane_s16 (0, arg1_int16x4x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c -@@ -15,5 +15,5 @@ - out_poly16x4_t = vld1_p16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c +@@ -16,5 +16,5 @@ + out_int32x2x2_t = vld2_lane_s32 (0, arg1_int32x2x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c -@@ -15,5 +15,5 @@ - out_poly8x8_t = vld1_p8 (0); - } - --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c -@@ -15,5 +15,5 @@ - out_int16x4_t = vld1_s16 (0); - } - --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c -@@ -15,5 +15,5 @@ - out_int32x2_t = vld1_s32 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c +@@ -16,5 +16,5 @@ + out_int8x8x2_t = vld2_lane_s8 (0, arg1_int8x8x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c -@@ -15,5 +15,5 @@ - out_int64x1_t = vld1_s64 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c +@@ -16,5 +16,5 @@ + out_uint16x4x2_t = vld2_lane_u16 (0, arg1_uint16x4x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c -@@ -15,5 +15,5 @@ - out_int8x8_t = vld1_s8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c +@@ -16,5 +16,5 @@ + out_uint32x2x2_t = vld2_lane_u32 (0, arg1_uint32x2x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c -@@ -15,5 +15,5 @@ - out_uint16x4_t = vld1_u16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c +@@ -16,5 +16,5 @@ + out_uint8x8x2_t = vld2_lane_u8 (0, arg1_uint8x8x2_t, 1); } --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c @@ -15,5 +15,5 @@ - out_uint32x2_t = vld1_u32 (0); + out_poly16x4x2_t = vld2_p16 (0); } --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c @@ -15,5 +15,5 @@ - out_uint64x1_t = vld1_u64 (0); + out_poly8x8x2_t = vld2_p8 (0); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c -@@ -15,5 +15,5 @@ - out_uint8x8_t = vld1_u8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c +@@ -15,6 +15,6 @@ + out_float32x4x2_t = vld2q_f32 (0); } --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c @@ -29366,17 +29927,6 @@ -/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c -@@ -15,6 +15,6 @@ - out_float32x4x2_t = vld2q_f32 (0); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c @@ -15,6 +15,6 @@ @@ -29465,213 +30015,6 @@ +/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c -@@ -15,5 +15,5 @@ - out_float32x2x2_t = vld2_dup_f32 (0); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c -@@ -15,5 +15,5 @@ - out_poly16x4x2_t = vld2_dup_p16 (0); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c -@@ -15,5 +15,5 @@ - out_poly8x8x2_t = vld2_dup_p8 (0); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c -@@ -15,5 +15,5 @@ - out_int16x4x2_t = vld2_dup_s16 (0); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c -@@ -15,5 +15,5 @@ - out_int32x2x2_t = vld2_dup_s32 (0); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c -@@ -15,5 +15,5 @@ - out_int64x1x2_t = vld2_dup_s64 (0); - } - --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c -@@ -15,5 +15,5 @@ - out_int8x8x2_t = vld2_dup_s8 (0); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c -@@ -15,5 +15,5 @@ - out_uint16x4x2_t = vld2_dup_u16 (0); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c -@@ -15,5 +15,5 @@ - out_uint32x2x2_t = vld2_dup_u32 (0); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c -@@ -15,5 +15,5 @@ - out_uint64x1x2_t = vld2_dup_u64 (0); - } - --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c -@@ -15,5 +15,5 @@ - out_uint8x8x2_t = vld2_dup_u8 (0); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c -@@ -16,5 +16,5 @@ - out_float32x2x2_t = vld2_lane_f32 (0, arg1_float32x2x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c -@@ -16,5 +16,5 @@ - out_poly16x4x2_t = vld2_lane_p16 (0, arg1_poly16x4x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c -@@ -16,5 +16,5 @@ - out_poly8x8x2_t = vld2_lane_p8 (0, arg1_poly8x8x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c -@@ -16,5 +16,5 @@ - out_int16x4x2_t = vld2_lane_s16 (0, arg1_int16x4x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c -@@ -16,5 +16,5 @@ - out_int32x2x2_t = vld2_lane_s32 (0, arg1_int32x2x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c -@@ -16,5 +16,5 @@ - out_int8x8x2_t = vld2_lane_s8 (0, arg1_int8x8x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c -@@ -16,5 +16,5 @@ - out_uint16x4x2_t = vld2_lane_u16 (0, arg1_uint16x4x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c -@@ -16,5 +16,5 @@ - out_uint32x2x2_t = vld2_lane_u32 (0, arg1_uint32x2x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c -@@ -16,5 +16,5 @@ - out_uint8x8x2_t = vld2_lane_u8 (0, arg1_uint8x8x2_t, 1); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c -@@ -15,5 +15,5 @@ - out_float32x2x2_t = vld2_f32 (0); - } - --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c -@@ -15,5 +15,5 @@ - out_poly16x4x2_t = vld2_p16 (0); - } - --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c -@@ -15,5 +15,5 @@ - out_poly8x8x2_t = vld2_p8 (0); - } - --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2s16.c @@ -15,5 +15,5 @@ @@ -29744,159 +30087,6 @@ -/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c -@@ -16,5 +16,5 @@ - out_float32x4x3_t = vld3q_lane_f32 (0, arg1_float32x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c -@@ -16,5 +16,5 @@ - out_poly16x8x3_t = vld3q_lane_p16 (0, arg1_poly16x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c -@@ -16,5 +16,5 @@ - out_int16x8x3_t = vld3q_lane_s16 (0, arg1_int16x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c -@@ -16,5 +16,5 @@ - out_int32x4x3_t = vld3q_lane_s32 (0, arg1_int32x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c -@@ -16,5 +16,5 @@ - out_uint16x8x3_t = vld3q_lane_u16 (0, arg1_uint16x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c -@@ -16,5 +16,5 @@ - out_uint32x4x3_t = vld3q_lane_u32 (0, arg1_uint32x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c -@@ -15,6 +15,6 @@ - out_float32x4x3_t = vld3q_f32 (0); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c -@@ -15,6 +15,6 @@ - out_poly16x8x3_t = vld3q_p16 (0); - } - --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c -@@ -15,6 +15,6 @@ - out_poly8x16x3_t = vld3q_p8 (0); - } - --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c -@@ -15,6 +15,6 @@ - out_int16x8x3_t = vld3q_s16 (0); - } - --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c -@@ -15,6 +15,6 @@ - out_int32x4x3_t = vld3q_s32 (0); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c -@@ -15,6 +15,6 @@ - out_int8x16x3_t = vld3q_s8 (0); - } - --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c -@@ -15,6 +15,6 @@ - out_uint16x8x3_t = vld3q_u16 (0); - } - --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c -@@ -15,6 +15,6 @@ - out_uint32x4x3_t = vld3q_u32 (0); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c -@@ -15,6 +15,6 @@ - out_uint8x16x3_t = vld3q_u8 (0); - } - --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c @@ -15,5 +15,5 @@ @@ -29996,6 +30186,15 @@ -/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c +@@ -15,5 +15,5 @@ + out_float32x2x3_t = vld3_f32 (0); + } + +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c @@ -16,5 +16,5 @@ @@ -30077,15 +30276,6 @@ -/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c -@@ -15,5 +15,5 @@ - out_float32x2x3_t = vld3_f32 (0); - } - --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3p16.c @@ -15,5 +15,5 @@ @@ -30104,230 +30294,230 @@ -/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c -@@ -15,5 +15,5 @@ - out_int16x4x3_t = vld3_s16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c +@@ -15,6 +15,6 @@ + out_float32x4x3_t = vld3q_f32 (0); } --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c -@@ -15,5 +15,5 @@ - out_int32x2x3_t = vld3_s32 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c +@@ -16,5 +16,5 @@ + out_float32x4x3_t = vld3q_lane_f32 (0, arg1_float32x4x3_t, 1); } --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c -@@ -15,5 +15,5 @@ - out_int64x1x3_t = vld3_s64 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c +@@ -16,5 +16,5 @@ + out_poly16x8x3_t = vld3q_lane_p16 (0, arg1_poly16x8x3_t, 1); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c -@@ -15,5 +15,5 @@ - out_int8x8x3_t = vld3_s8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c +@@ -16,5 +16,5 @@ + out_int16x8x3_t = vld3q_lane_s16 (0, arg1_int16x8x3_t, 1); } --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c -@@ -15,5 +15,5 @@ - out_uint16x4x3_t = vld3_u16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c +@@ -16,5 +16,5 @@ + out_int32x4x3_t = vld3q_lane_s32 (0, arg1_int32x4x3_t, 1); } --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c -@@ -15,5 +15,5 @@ - out_uint32x2x3_t = vld3_u32 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c +@@ -16,5 +16,5 @@ + out_uint16x8x3_t = vld3q_lane_u16 (0, arg1_uint16x8x3_t, 1); } --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c -@@ -15,5 +15,5 @@ - out_uint64x1x3_t = vld3_u64 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c +@@ -16,5 +16,5 @@ + out_uint32x4x3_t = vld3q_lane_u32 (0, arg1_uint32x4x3_t, 1); } --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c -@@ -15,5 +15,5 @@ - out_uint8x8x3_t = vld3_u8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c +@@ -15,6 +15,6 @@ + out_poly16x8x3_t = vld3q_p16 (0); } --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c -@@ -16,5 +16,5 @@ - out_float32x4x4_t = vld4q_lane_f32 (0, arg1_float32x4x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c +@@ -15,6 +15,6 @@ + out_poly8x16x3_t = vld3q_p8 (0); } --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c -@@ -16,5 +16,5 @@ - out_poly16x8x4_t = vld4q_lane_p16 (0, arg1_poly16x8x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c +@@ -15,6 +15,6 @@ + out_int16x8x3_t = vld3q_s16 (0); } --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c -@@ -16,5 +16,5 @@ - out_int16x8x4_t = vld4q_lane_s16 (0, arg1_int16x8x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c +@@ -15,6 +15,6 @@ + out_int32x4x3_t = vld3q_s32 (0); } --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c -@@ -16,5 +16,5 @@ - out_int32x4x4_t = vld4q_lane_s32 (0, arg1_int32x4x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c +@@ -15,6 +15,6 @@ + out_int8x16x3_t = vld3q_s8 (0); } --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c -@@ -16,5 +16,5 @@ - out_uint16x8x4_t = vld4q_lane_u16 (0, arg1_uint16x8x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c +@@ -15,6 +15,6 @@ + out_uint16x8x3_t = vld3q_u16 (0); } --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c -@@ -16,5 +16,5 @@ - out_uint32x4x4_t = vld4q_lane_u32 (0, arg1_uint32x4x4_t, 1); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c +@@ -15,6 +15,6 @@ + out_uint32x4x3_t = vld3q_u32 (0); } --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c @@ -15,6 +15,6 @@ - out_float32x4x4_t = vld4q_f32 (0); + out_uint8x16x3_t = vld3q_u8 (0); } --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c -@@ -15,6 +15,6 @@ - out_poly16x8x4_t = vld4q_p16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c +@@ -15,5 +15,5 @@ + out_int16x4x3_t = vld3_s16 (0); } --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c -@@ -15,6 +15,6 @@ - out_poly8x16x4_t = vld4q_p8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c +@@ -15,5 +15,5 @@ + out_int32x2x3_t = vld3_s32 (0); } --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c -@@ -15,6 +15,6 @@ - out_int16x8x4_t = vld4q_s16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c +@@ -15,5 +15,5 @@ + out_int64x1x3_t = vld3_s64 (0); } --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c -@@ -15,6 +15,6 @@ - out_int32x4x4_t = vld4q_s32 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c +@@ -15,5 +15,5 @@ + out_int8x8x3_t = vld3_s8 (0); } --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c -@@ -15,6 +15,6 @@ - out_int8x16x4_t = vld4q_s8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c +@@ -15,5 +15,5 @@ + out_uint16x4x3_t = vld3_u16 (0); } --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c -@@ -15,6 +15,6 @@ - out_uint16x8x4_t = vld4q_u16 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c +@@ -15,5 +15,5 @@ + out_uint32x2x3_t = vld3_u32 (0); } --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c -@@ -15,6 +15,6 @@ - out_uint32x4x4_t = vld4q_u32 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c +@@ -15,5 +15,5 @@ + out_uint64x1x3_t = vld3_u64 (0); } --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c -@@ -15,6 +15,6 @@ - out_uint8x16x4_t = vld4q_u8 (0); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c +@@ -15,5 +15,5 @@ + out_uint8x8x3_t = vld3_u8 (0); } --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c @@ -30428,6 +30618,15 @@ -/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c +@@ -15,5 +15,5 @@ + out_float32x2x4_t = vld4_f32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c @@ -16,5 +16,5 @@ @@ -30509,15 +30708,6 @@ -/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c -@@ -15,5 +15,5 @@ - out_float32x2x4_t = vld4_f32 (0); - } - --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4p16.c @@ -15,5 +15,5 @@ @@ -30536,6 +30726,159 @@ -/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c +@@ -15,6 +15,6 @@ + out_float32x4x4_t = vld4q_f32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c +@@ -16,5 +16,5 @@ + out_float32x4x4_t = vld4q_lane_f32 (0, arg1_float32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c +@@ -16,5 +16,5 @@ + out_poly16x8x4_t = vld4q_lane_p16 (0, arg1_poly16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c +@@ -16,5 +16,5 @@ + out_int16x8x4_t = vld4q_lane_s16 (0, arg1_int16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c +@@ -16,5 +16,5 @@ + out_int32x4x4_t = vld4q_lane_s32 (0, arg1_int32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c +@@ -16,5 +16,5 @@ + out_uint16x8x4_t = vld4q_lane_u16 (0, arg1_uint16x8x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c +@@ -16,5 +16,5 @@ + out_uint32x4x4_t = vld4q_lane_u32 (0, arg1_uint32x4x4_t, 1); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c +@@ -15,6 +15,6 @@ + out_poly16x8x4_t = vld4q_p16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c +@@ -15,6 +15,6 @@ + out_poly8x16x4_t = vld4q_p8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c +@@ -15,6 +15,6 @@ + out_int16x8x4_t = vld4q_s16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c +@@ -15,6 +15,6 @@ + out_int32x4x4_t = vld4q_s32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c +@@ -15,6 +15,6 @@ + out_int8x16x4_t = vld4q_s8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c +@@ -15,6 +15,6 @@ + out_uint16x8x4_t = vld4q_u16 (0); + } + +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c +@@ -15,6 +15,6 @@ + out_uint32x4x4_t = vld4q_u32 (0); + } + +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c +@@ -15,6 +15,6 @@ + out_uint8x16x4_t = vld4q_u8 (0); + } + +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4s16.c @@ -15,5 +15,5 @@ @@ -30608,329 +30951,329 @@ -/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c @@ -16,5 +16,5 @@ - vst1q_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1); + vst1_f32 (arg0_float32_t, arg1_float32x2_t); + } + +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c +@@ -16,5 +16,5 @@ + vst1_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1); } -/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c @@ -16,5 +16,5 @@ - vst1q_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1); + vst1_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1); } -/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c @@ -16,5 +16,5 @@ - vst1q_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1); + vst1_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1); } -/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c @@ -16,5 +16,5 @@ - vst1q_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1); + vst1_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1); } -/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c @@ -16,5 +16,5 @@ - vst1q_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1); + vst1_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1); } -/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c @@ -16,5 +16,5 @@ - vst1q_lane_s64 (arg0_int64_t, arg1_int64x2_t, 1); + vst1_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0); } -/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c @@ -16,5 +16,5 @@ - vst1q_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1); + vst1_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1); } -/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c @@ -16,5 +16,5 @@ - vst1q_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1); + vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1); } -/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c @@ -16,5 +16,5 @@ - vst1q_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1); + vst1_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1); } -/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c @@ -16,5 +16,5 @@ - vst1q_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 1); + vst1_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0); } -/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c @@ -16,5 +16,5 @@ - vst1q_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1); + vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1); } -/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c @@ -16,5 +16,5 @@ - vst1q_f32 (arg0_float32_t, arg1_float32x4_t); + vst1_p16 (arg0_poly16_t, arg1_poly16x4_t); } --/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c @@ -16,5 +16,5 @@ - vst1q_p16 (arg0_poly16_t, arg1_poly16x8_t); + vst1_p8 (arg0_poly8_t, arg1_poly8x8_t); } --/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c @@ -16,5 +16,5 @@ - vst1q_p8 (arg0_poly8_t, arg1_poly8x16_t); + vst1q_f32 (arg0_float32_t, arg1_float32x4_t); } --/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c @@ -16,5 +16,5 @@ - vst1q_s16 (arg0_int16_t, arg1_int16x8_t); + vst1q_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1); } --/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c @@ -16,5 +16,5 @@ - vst1q_s32 (arg0_int32_t, arg1_int32x4_t); + vst1q_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1); } --/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c @@ -16,5 +16,5 @@ - vst1q_s64 (arg0_int64_t, arg1_int64x2_t); + vst1q_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1); } --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c @@ -16,5 +16,5 @@ - vst1q_s8 (arg0_int8_t, arg1_int8x16_t); + vst1q_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1); } --/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c @@ -16,5 +16,5 @@ - vst1q_u16 (arg0_uint16_t, arg1_uint16x8_t); + vst1q_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1); } --/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c @@ -16,5 +16,5 @@ - vst1q_u32 (arg0_uint32_t, arg1_uint32x4_t); + vst1q_lane_s64 (arg0_int64_t, arg1_int64x2_t, 1); } --/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c @@ -16,5 +16,5 @@ - vst1q_u64 (arg0_uint64_t, arg1_uint64x2_t); + vst1q_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1); } --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c @@ -16,5 +16,5 @@ - vst1q_u8 (arg0_uint8_t, arg1_uint8x16_t); + vst1q_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1); } --/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c @@ -16,5 +16,5 @@ - vst1_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1); + vst1q_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1); } -/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c @@ -16,5 +16,5 @@ - vst1_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1); + vst1q_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 1); } --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c @@ -16,5 +16,5 @@ - vst1_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1); + vst1q_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1); } -/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c -@@ -16,5 +16,5 @@ - vst1_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1); - } - --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c @@ -16,5 +16,5 @@ - vst1_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1); + vst1q_p16 (arg0_poly16_t, arg1_poly16x8_t); } --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c @@ -16,5 +16,5 @@ - vst1_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0); + vst1q_p8 (arg0_poly8_t, arg1_poly8x16_t); } --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c @@ -16,5 +16,5 @@ - vst1_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1); + vst1q_s16 (arg0_int16_t, arg1_int16x8_t); } --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c @@ -16,5 +16,5 @@ - vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1); + vst1q_s32 (arg0_int32_t, arg1_int32x4_t); } --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c @@ -16,5 +16,5 @@ - vst1_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1); + vst1q_s64 (arg0_int64_t, arg1_int64x2_t); } --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c @@ -16,5 +16,5 @@ - vst1_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0); + vst1q_s8 (arg0_int8_t, arg1_int8x16_t); } --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c @@ -16,5 +16,5 @@ - vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1); + vst1q_u16 (arg0_uint16_t, arg1_uint16x8_t); } --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c @@ -16,5 +16,5 @@ - vst1_f32 (arg0_float32_t, arg1_float32x2_t); + vst1q_u32 (arg0_uint32_t, arg1_uint32x4_t); } --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c @@ -16,5 +16,5 @@ - vst1_p16 (arg0_poly16_t, arg1_poly16x4_t); + vst1q_u64 (arg0_uint64_t, arg1_uint64x2_t); } --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c @@ -16,5 +16,5 @@ - vst1_p8 (arg0_poly8_t, arg1_poly8x8_t); + vst1q_u8 (arg0_uint8_t, arg1_uint8x16_t); } --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1s16.c @@ -31004,159 +31347,15 @@ -/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c -@@ -16,5 +16,5 @@ - vst2q_lane_f32 (arg0_float32_t, arg1_float32x4x2_t, 1); - } - --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c -@@ -16,5 +16,5 @@ - vst2q_lane_p16 (arg0_poly16_t, arg1_poly16x8x2_t, 1); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c -@@ -16,5 +16,5 @@ - vst2q_lane_s16 (arg0_int16_t, arg1_int16x8x2_t, 1); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c -@@ -16,5 +16,5 @@ - vst2q_lane_s32 (arg0_int32_t, arg1_int32x4x2_t, 1); - } - --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c -@@ -16,5 +16,5 @@ - vst2q_lane_u16 (arg0_uint16_t, arg1_uint16x8x2_t, 1); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c @@ -16,5 +16,5 @@ - vst2q_lane_u32 (arg0_uint32_t, arg1_uint32x4x2_t, 1); - } - --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c -@@ -16,6 +16,6 @@ - vst2q_f32 (arg0_float32_t, arg1_float32x4x2_t); - } - --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c -@@ -16,6 +16,6 @@ - vst2q_p16 (arg0_poly16_t, arg1_poly16x8x2_t); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c -@@ -16,6 +16,6 @@ - vst2q_p8 (arg0_poly8_t, arg1_poly8x16x2_t); - } - --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c -@@ -16,6 +16,6 @@ - vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c -@@ -16,6 +16,6 @@ - vst2q_s32 (arg0_int32_t, arg1_int32x4x2_t); - } - --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c -@@ -16,6 +16,6 @@ - vst2q_s8 (arg0_int8_t, arg1_int8x16x2_t); - } - --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c -@@ -16,6 +16,6 @@ - vst2q_u16 (arg0_uint16_t, arg1_uint16x8x2_t); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c -@@ -16,6 +16,6 @@ - vst2q_u32 (arg0_uint32_t, arg1_uint32x4x2_t); + vst2_f32 (arg0_float32_t, arg1_float32x2x2_t); } -/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c -@@ -16,6 +16,6 @@ - vst2q_u8 (arg0_uint8_t, arg1_uint8x16x2_t); - } - --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c @@ -16,5 +16,5 @@ @@ -31238,15 +31437,6 @@ -/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c -@@ -16,5 +16,5 @@ - vst2_f32 (arg0_float32_t, arg1_float32x2x2_t); - } - --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2p16.c @@ -16,5 +16,5 @@ @@ -31265,12120 +31455,10806 @@ -/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c -@@ -16,5 +16,5 @@ - vst2_s16 (arg0_int16_t, arg1_int16x4x2_t); - } - --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c -@@ -16,5 +16,5 @@ - vst2_s32 (arg0_int32_t, arg1_int32x2x2_t); +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c +@@ -16,6 +16,6 @@ + vst2q_f32 (arg0_float32_t, arg1_float32x4x2_t); } -/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c -@@ -16,5 +16,5 @@ - vst2_s64 (arg0_int64_t, arg1_int64x1x2_t); - } - --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c -@@ -16,5 +16,5 @@ - vst2_s8 (arg0_int8_t, arg1_int8x8x2_t); - } - --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c @@ -16,5 +16,5 @@ - vst2_u16 (arg0_uint16_t, arg1_uint16x4x2_t); + vst2q_lane_f32 (arg0_float32_t, arg1_float32x4x2_t, 1); } --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c @@ -16,5 +16,5 @@ - vst2_u32 (arg0_uint32_t, arg1_uint32x2x2_t); + vst2q_lane_p16 (arg0_poly16_t, arg1_poly16x8x2_t, 1); } --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c @@ -16,5 +16,5 @@ - vst2_u64 (arg0_uint64_t, arg1_uint64x1x2_t); + vst2q_lane_s16 (arg0_int16_t, arg1_int16x8x2_t, 1); } --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c @@ -16,5 +16,5 @@ - vst2_u8 (arg0_uint8_t, arg1_uint8x8x2_t); + vst2q_lane_s32 (arg0_int32_t, arg1_int32x4x2_t, 1); } --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c @@ -16,5 +16,5 @@ - vst3q_lane_f32 (arg0_float32_t, arg1_float32x4x3_t, 1); + vst2q_lane_u16 (arg0_uint16_t, arg1_uint16x8x2_t, 1); } --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c @@ -16,5 +16,5 @@ - vst3q_lane_p16 (arg0_poly16_t, arg1_poly16x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c -@@ -16,5 +16,5 @@ - vst3q_lane_s16 (arg0_int16_t, arg1_int16x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c -@@ -16,5 +16,5 @@ - vst3q_lane_s32 (arg0_int32_t, arg1_int32x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c -@@ -16,5 +16,5 @@ - vst3q_lane_u16 (arg0_uint16_t, arg1_uint16x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c -@@ -16,5 +16,5 @@ - vst3q_lane_u32 (arg0_uint32_t, arg1_uint32x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c -@@ -16,6 +16,6 @@ - vst3q_f32 (arg0_float32_t, arg1_float32x4x3_t); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c -@@ -16,6 +16,6 @@ - vst3q_p16 (arg0_poly16_t, arg1_poly16x8x3_t); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c -@@ -16,6 +16,6 @@ - vst3q_p8 (arg0_poly8_t, arg1_poly8x16x3_t); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c -@@ -16,6 +16,6 @@ - vst3q_s16 (arg0_int16_t, arg1_int16x8x3_t); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c -@@ -16,6 +16,6 @@ - vst3q_s32 (arg0_int32_t, arg1_int32x4x3_t); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c -@@ -16,6 +16,6 @@ - vst3q_s8 (arg0_int8_t, arg1_int8x16x3_t); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c -@@ -16,6 +16,6 @@ - vst3q_u16 (arg0_uint16_t, arg1_uint16x8x3_t); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c -@@ -16,6 +16,6 @@ - vst3q_u32 (arg0_uint32_t, arg1_uint32x4x3_t); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c -@@ -16,6 +16,6 @@ - vst3q_u8 (arg0_uint8_t, arg1_uint8x16x3_t); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c -@@ -16,5 +16,5 @@ - vst3_lane_f32 (arg0_float32_t, arg1_float32x2x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c -@@ -16,5 +16,5 @@ - vst3_lane_p16 (arg0_poly16_t, arg1_poly16x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c -@@ -16,5 +16,5 @@ - vst3_lane_p8 (arg0_poly8_t, arg1_poly8x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c -@@ -16,5 +16,5 @@ - vst3_lane_s16 (arg0_int16_t, arg1_int16x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c -@@ -16,5 +16,5 @@ - vst3_lane_s32 (arg0_int32_t, arg1_int32x2x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c -@@ -16,5 +16,5 @@ - vst3_lane_s8 (arg0_int8_t, arg1_int8x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c -@@ -16,5 +16,5 @@ - vst3_lane_u16 (arg0_uint16_t, arg1_uint16x4x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c -@@ -16,5 +16,5 @@ - vst3_lane_u32 (arg0_uint32_t, arg1_uint32x2x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c -@@ -16,5 +16,5 @@ - vst3_lane_u8 (arg0_uint8_t, arg1_uint8x8x3_t, 1); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c -@@ -16,5 +16,5 @@ - vst3_f32 (arg0_float32_t, arg1_float32x2x3_t); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c -@@ -16,5 +16,5 @@ - vst3_p16 (arg0_poly16_t, arg1_poly16x4x3_t); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c -@@ -16,5 +16,5 @@ - vst3_p8 (arg0_poly8_t, arg1_poly8x8x3_t); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c -@@ -16,5 +16,5 @@ - vst3_s16 (arg0_int16_t, arg1_int16x4x3_t); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c -@@ -16,5 +16,5 @@ - vst3_s32 (arg0_int32_t, arg1_int32x2x3_t); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c -@@ -16,5 +16,5 @@ - vst3_s64 (arg0_int64_t, arg1_int64x1x3_t); - } - --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c -@@ -16,5 +16,5 @@ - vst3_s8 (arg0_int8_t, arg1_int8x8x3_t); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c -@@ -16,5 +16,5 @@ - vst3_u16 (arg0_uint16_t, arg1_uint16x4x3_t); - } - --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c -@@ -16,5 +16,5 @@ - vst3_u32 (arg0_uint32_t, arg1_uint32x2x3_t); - } - --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c -@@ -16,5 +16,5 @@ - vst3_u64 (arg0_uint64_t, arg1_uint64x1x3_t); - } - --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c -@@ -16,5 +16,5 @@ - vst3_u8 (arg0_uint8_t, arg1_uint8x8x3_t); - } - --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c -@@ -16,5 +16,5 @@ - vst4q_lane_f32 (arg0_float32_t, arg1_float32x4x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c -@@ -16,5 +16,5 @@ - vst4q_lane_p16 (arg0_poly16_t, arg1_poly16x8x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c -@@ -16,5 +16,5 @@ - vst4q_lane_s16 (arg0_int16_t, arg1_int16x8x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c -@@ -16,5 +16,5 @@ - vst4q_lane_s32 (arg0_int32_t, arg1_int32x4x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c -@@ -16,5 +16,5 @@ - vst4q_lane_u16 (arg0_uint16_t, arg1_uint16x8x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c -@@ -16,5 +16,5 @@ - vst4q_lane_u32 (arg0_uint32_t, arg1_uint32x4x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c -@@ -16,6 +16,6 @@ - vst4q_f32 (arg0_float32_t, arg1_float32x4x4_t); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c -@@ -16,6 +16,6 @@ - vst4q_p16 (arg0_poly16_t, arg1_poly16x8x4_t); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c -@@ -16,6 +16,6 @@ - vst4q_p8 (arg0_poly8_t, arg1_poly8x16x4_t); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c -@@ -16,6 +16,6 @@ - vst4q_s16 (arg0_int16_t, arg1_int16x8x4_t); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c -@@ -16,6 +16,6 @@ - vst4q_s32 (arg0_int32_t, arg1_int32x4x4_t); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c -@@ -16,6 +16,6 @@ - vst4q_s8 (arg0_int8_t, arg1_int8x16x4_t); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c -@@ -16,6 +16,6 @@ - vst4q_u16 (arg0_uint16_t, arg1_uint16x8x4_t); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c -@@ -16,6 +16,6 @@ - vst4q_u32 (arg0_uint32_t, arg1_uint32x4x4_t); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c -@@ -16,6 +16,6 @@ - vst4q_u8 (arg0_uint8_t, arg1_uint8x16x4_t); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c -@@ -16,5 +16,5 @@ - vst4_lane_f32 (arg0_float32_t, arg1_float32x2x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c -@@ -16,5 +16,5 @@ - vst4_lane_p16 (arg0_poly16_t, arg1_poly16x4x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c -@@ -16,5 +16,5 @@ - vst4_lane_p8 (arg0_poly8_t, arg1_poly8x8x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c -@@ -16,5 +16,5 @@ - vst4_lane_s16 (arg0_int16_t, arg1_int16x4x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c -@@ -16,5 +16,5 @@ - vst4_lane_s32 (arg0_int32_t, arg1_int32x2x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c -@@ -16,5 +16,5 @@ - vst4_lane_s8 (arg0_int8_t, arg1_int8x8x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c -@@ -16,5 +16,5 @@ - vst4_lane_u16 (arg0_uint16_t, arg1_uint16x4x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c -@@ -16,5 +16,5 @@ - vst4_lane_u32 (arg0_uint32_t, arg1_uint32x2x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c -@@ -16,5 +16,5 @@ - vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c -@@ -16,5 +16,5 @@ - vst4_f32 (arg0_float32_t, arg1_float32x2x4_t); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c -@@ -16,5 +16,5 @@ - vst4_p16 (arg0_poly16_t, arg1_poly16x4x4_t); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c -@@ -16,5 +16,5 @@ - vst4_p8 (arg0_poly8_t, arg1_poly8x8x4_t); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c -@@ -16,5 +16,5 @@ - vst4_s16 (arg0_int16_t, arg1_int16x4x4_t); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c -@@ -16,5 +16,5 @@ - vst4_s32 (arg0_int32_t, arg1_int32x2x4_t); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c -@@ -16,5 +16,5 @@ - vst4_s64 (arg0_int64_t, arg1_int64x1x4_t); - } - --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c -@@ -16,5 +16,5 @@ - vst4_s8 (arg0_int8_t, arg1_int8x8x4_t); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c -@@ -16,5 +16,5 @@ - vst4_u16 (arg0_uint16_t, arg1_uint16x4x4_t); - } - --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c -@@ -16,5 +16,5 @@ - vst4_u32 (arg0_uint32_t, arg1_uint32x2x4_t); - } - --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c -@@ -16,5 +16,5 @@ - vst4_u64 (arg0_uint64_t, arg1_uint64x1x4_t); - } - --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c -@@ -16,5 +16,5 @@ - vst4_u8 (arg0_uint8_t, arg1_uint8x8x4_t); - } - --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c -@@ -0,0 +1,50 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -funsafe-math-optimizations" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include -+float32x2_t f_sub_abs_to_vabd_32() -+{ -+ float32x2_t val1 = vdup_n_f32 (10); -+ float32x2_t val2 = vdup_n_f32 (30); -+ float32x2_t sres = vsub_f32(val1, val2); -+ float32x2_t res = vabs_f32 (sres); -+ -+ return res; -+} -+/* { dg-final { scan-assembler "vabd\.f32" } }*/ -+ -+#include -+int8x8_t sub_abs_to_vabd_8() -+{ -+ int8x8_t val1 = vdup_n_s8 (10); -+ int8x8_t val2 = vdup_n_s8 (30); -+ int8x8_t sres = vsub_s8(val1, val2); -+ int8x8_t res = vabs_s8 (sres); -+ -+ return res; -+} -+/* { dg-final { scan-assembler "vabd\.s8" } }*/ -+ -+int16x4_t sub_abs_to_vabd_16() -+{ -+ int16x4_t val1 = vdup_n_s16 (10); -+ int16x4_t val2 = vdup_n_s16 (30); -+ int16x4_t sres = vsub_s16(val1, val2); -+ int16x4_t res = vabs_s16 (sres); -+ -+ return res; -+} -+/* { dg-final { scan-assembler "vabd\.s16" } }*/ -+ -+int32x2_t sub_abs_to_vabd_32() -+{ -+ int32x2_t val1 = vdup_n_s32 (10); -+ int32x2_t val2 = vdup_n_s32 (30); -+ int32x2_t sres = vsub_s32(val1, val2); -+ int32x2_t res = vabs_s32 (sres); -+ -+ return res; -+} -+/* { dg-final { scan-assembler "vabd\.s32" } }*/ ---- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O1" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+ -+#define SETUP(A) x##A = vld3_u32 (ptr + A * 0x20) -+#define MODIFY(A) x##A = vld3_lane_u32 (ptr + A * 0x20 + 0x10, x##A, 1) -+#define STORE(A) vst3_u32 (ptr + A * 0x20, x##A) -+ -+#define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5) -+ -+void -+bar (uint32_t *ptr, int y) -+{ -+ uint32x2x3_t MANY (SETUP); -+ int *x = __builtin_alloca (y); -+ int z[0x1000]; -+ foo (x, z); -+ MANY (MODIFY); -+ foo (x, z); -+ MANY (STORE); -+} ---- a/src/gcc/testsuite/gcc.target/arm/neon-modes-3.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-3.c -@@ -0,0 +1,61 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include -+ -+void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n) -+{ -+ float32x4x4_t a5, a6, a7, a8, a9; -+ int i; -+ -+ a5 = *src; -+ a6 = *src; -+ a7 = *src; -+ a8 = *src; -+ a9 = *src; -+ while (n--) -+ { -+ for (i = 0; i < 8; i++) -+ { -+ float32x4x4_t a0, a1, a2, a3, a4; -+ -+ a0 = *src; -+ a1 = *src; -+ a2 = *src; -+ a3 = *src; -+ a4 = *src; -+ *src = a0; -+ *dest = a0.val[0]; -+ *dest = a0.val[3]; -+ *src = a1; -+ *dest = a1.val[0]; -+ *dest = a1.val[3]; -+ *src = a2; -+ *dest = a2.val[0]; -+ *dest = a2.val[3]; -+ *src = a3; -+ *dest = a3.val[0]; -+ *dest = a3.val[3]; -+ *src = a4; -+ *dest = a4.val[0]; -+ *dest = a4.val[3]; -+ } -+ *src = a5; -+ *dest = a5.val[0]; -+ *dest = a5.val[3]; -+ *src = a6; -+ *dest = a6.val[0]; -+ *dest = a6.val[3]; -+ *src = a7; -+ *dest = a7.val[0]; -+ *dest = a7.val[3]; -+ *src = a8; -+ *dest = a8.val[0]; -+ *dest = a8.val[3]; -+ *src = a9; -+ *dest = a9.val[0]; -+ *dest = a9.val[3]; -+ } -+} ---- a/src/gcc/testsuite/gcc.target/arm/neon-vld3-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vld3-1.c -@@ -0,0 +1,27 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_hw } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+ -+uint32_t buffer[12]; -+ -+void __attribute__((noinline)) -+foo (uint32_t *a) -+{ -+ uint32x4x3_t x; -+ -+ x = vld3q_u32 (a); -+ x.val[0] = vaddq_u32 (x.val[0], x.val[1]); -+ vst3q_u32 (a, x); -+} -+ -+int -+main (void) -+{ -+ buffer[0] = 1; -+ buffer[1] = 2; -+ foo (buffer); -+ return buffer[0] != 3; -+} ---- a/src/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ -+/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */ -+ -+/* Verify that VSHR immediate is used. */ -+void f1(int n, unsigned int x[], unsigned int y[]) { -+ int i; -+ for (i = 0; i < n; ++i) -+ y[i] = x[i] >> 3; -+} ---- a/src/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+/* { dg-add-options arm_neon } */ -+ -+void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b) -+{ -+ int i; -+ for (i = 0; i < 9; i++) -+ c[i] = b[i] | (~a[i]); -+} -+void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b) -+{ -+ int i; -+ for (i = 0; i < 9; i++) -+ c[i] = b[i] & (~a[i]); -+} -+ -+/* { dg-final { scan-assembler "vorn\\t" } } */ -+/* { dg-final { scan-assembler "vbic\\t" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ -+/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */ -+ -+/* Verify that VSHR immediate is used. */ -+void f1(int n, int x[], int y[]) { -+ int i; -+ for (i = 0; i < n; ++i) -+ y[i] = x[i] << 3; -+} ---- a/src/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ -+/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */ -+ -+/* Verify that VSHR immediate is used. */ -+void f1(int n, int x[], int y[]) { -+ int i; -+ for (i = 0; i < n; ++i) -+ y[i] = x[i] >> 3; -+} ---- a/src/gcc/testsuite/gcc.target/arm/neon-vst3-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vst3-1.c -@@ -0,0 +1,25 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_hw } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+ -+uint32_t buffer[64]; -+ -+void __attribute__((noinline)) -+foo (uint32_t *a) -+{ -+ uint32x4x3_t x; -+ -+ x = vld3q_u32 (a); -+ a[35] = 1; -+ vst3q_lane_u32 (a + 32, x, 1); -+} -+ -+int -+main (void) -+{ -+ foo (buffer); -+ return buffer[35] != 1; -+} ---- a/src/gcc/testsuite/gcc.target/arm/no-wmla-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/no-wmla-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+int -+foo (int a, short b, short c) -+{ -+ int bc = b * c; -+ return a + (short)bc; -+} -+ -+/* { dg-final { scan-assembler "mul" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr46329.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr46329.c -@@ -0,0 +1,9 @@ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_neon } */ -+ -+int __attribute__ ((vector_size (32))) x; -+void -+foo (void) -+{ -+ x <<= x; -+} ---- a/src/gcc/testsuite/gcc.target/arm/pr48183.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr48183.c -@@ -0,0 +1,25 @@ -+/* testsuite/gcc.target/arm/pr48183.c */ -+ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O -g" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include -+ -+void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n) -+{ -+ unsigned i; -+ int16x4x2_t input; -+ int32x4x2_t mid; -+ int32x4x2_t output; -+ -+ for (i = 0; i < n/2; i += 8) { -+ input = vld2_s16(src + i); -+ mid.val[0] = vmovl_s16(input.val[0]); -+ mid.val[1] = vmovl_s16(input.val[1]); -+ output.val[0] = vshlq_n_s32(mid.val[0], 8); -+ output.val[1] = vshlq_n_s32(mid.val[1], 8); -+ vst2q_s32((int32_t *)dst + i, output); -+ } -+} ---- a/src/gcc/testsuite/gcc.target/arm/pr49641.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr49641.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mthumb -O2" } */ -+/* { dg-require-effective-target arm_thumb1_ok } */ -+/* { dg-final { scan-assembler-not "stmia\[\\t \]*r3!\[^\\n]*r3" } } */ -+typedef struct { -+ void *t1, *t2, *t3; -+} z; -+extern volatile int y; -+static inline void foo(z *x) { -+ x->t1 = &x->t2; -+ x->t2 = ((void *)0); -+ x->t3 = &x->t1; -+} -+extern z v; -+void bar (void) { -+ y = 0; -+ foo(&v); -+} ---- a/src/gcc/testsuite/gcc.target/arm/pr50099.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr50099.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+long long foo (signed char * arg) -+{ -+ long long temp_1; -+ -+ temp_1 = arg[256]; -+ return temp_1; -+} ---- a/src/gcc/testsuite/gcc.target/arm/pr50318-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr50318-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm_dsp } */ -+ -+long long test (unsigned int sec, unsigned long long nsecs) -+{ -+ return (long long)(long)sec * 1000000000L + (long long)(unsigned -+ long)nsecs; -+} -+ -+/* { dg-final { scan-assembler "umlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/shiftable.c -+++ b/src/gcc/testsuite/gcc.target/arm/shiftable.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+ -+/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some -+ of these as a left shift, others as a multiply. Check that we match the -+ right one. */ -+ -+int -+plus (int a, int b) -+{ -+ return (a * 64) + b; -+} -+ -+/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */ -+ -+int -+minus (int a, int b) -+{ -+ return a - (b * 64); -+} -+ -+/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */ -+ -+int -+ior (int a, int b) -+{ -+ return (a * 64) | b; -+} -+ -+/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */ -+ -+int -+xor (int a, int b) -+{ -+ return (a * 64) ^ b; -+} -+ -+/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */ -+ -+int -+and (int a, int b) -+{ -+ return (a * 64) & b; -+} -+ -+/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */ -+ -+int -+rsb (int a, int b) -+{ -+ return (a * 64) - b; -+} -+ -+/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */ -+ -+int -+mvn (int a, int b) -+{ -+ return ~(a * 64); -+} -+ -+/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/smlaltb-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/smlaltb-1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long int -+foo (long long x, int in) -+{ -+ short a = in & 0xffff; -+ short b = (in & 0xffff0000) >> 16; -+ -+ return x + b * a; -+} -+ -+/* { dg-final { scan-assembler "smlaltb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/smlaltt-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/smlaltt-1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long int -+foo (long long x, int in1, int in2) -+{ -+ short a = (in1 & 0xffff0000) >> 16; -+ short b = (in2 & 0xffff0000) >> 16; -+ -+ return x + b * a; -+} -+ -+/* { dg-final { scan-assembler "smlaltt" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/smlatb-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/smlatb-1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+int -+foo (int x, int in) -+{ -+ short a = in & 0xffff; -+ short b = (in & 0xffff0000) >> 16; -+ -+ return x + b * a; -+} -+ -+/* { dg-final { scan-assembler "smlatb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/smlatt-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/smlatt-1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+int -+foo (int x, int in1, int in2) -+{ -+ short a = (in1 & 0xffff0000) >> 16; -+ short b = (in2 & 0xffff0000) >> 16; -+ -+ return x + b * a; -+} -+ -+/* { dg-final { scan-assembler "smlatt" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c -@@ -0,0 +1,13 @@ -+/* Use conditional compare */ -+/* { dg-options "-O2" } */ -+/* { dg-skip-if "" { arm_thumb1_ok } } */ -+/* { dg-final { scan-assembler "cmpne" } } */ -+ -+int f(int i, int j) -+{ -+ if ( (i == '+') || (j == '-') ) { -+ return 1; -+ } else { -+ return 0; -+ } -+} ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c -@@ -0,0 +1,13 @@ -+/* Use conditional compare */ -+/* { dg-options "-O2" } */ -+/* { dg-skip-if "" { arm_thumb1_ok } } */ -+/* { dg-final { scan-assembler "cmpeq" } } */ -+ -+int f(int i, int j) -+{ -+ if ( (i == '+') && (j == '-') ) { -+ return 1; -+ } else { -+ return 0; -+ } -+} ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c -@@ -0,0 +1,12 @@ -+/* Use conditional compare */ -+/* { dg-options "-O2" } */ -+/* { dg-skip-if "" { arm_thumb1_ok } } */ -+/* { dg-final { scan-assembler "cmpgt" } } */ -+ -+int f(int i, int j) -+{ -+ if ( (i >= '+') ? (j > '-') : 0) -+ return 1; -+ else -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c -@@ -0,0 +1,12 @@ -+/* Use conditional compare */ -+/* { dg-options "-O2" } */ -+/* { dg-skip-if "" { arm_thumb1_ok } } */ -+/* { dg-final { scan-assembler "cmpgt" } } */ -+ -+int f(int i, int j) -+{ -+ if ( (i >= '+') ? (j <= '-') : 1) -+ return 1; -+ else -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c -@@ -0,0 +1,27 @@ -+/* Ensure simple replicated constant immediates work. */ -+/* { dg-options "-mthumb -O2" } */ -+/* { dg-require-effective-target arm_thumb2_ok } */ -+ -+int -+foo1 (int a) -+{ -+ return a + 0xfefefefe; -+} -+ -+/* { dg-final { scan-assembler "add.*#-16843010" } } */ -+ -+int -+foo2 (int a) -+{ -+ return a - 0xab00ab00; -+} -+ -+/* { dg-final { scan-assembler "sub.*#-1426019584" } } */ -+ -+int -+foo3 (int a) -+{ -+ return a & 0x00cd00cd; -+} -+ -+/* { dg-final { scan-assembler "and.*#13435085" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c -@@ -0,0 +1,75 @@ -+/* Ensure split constants can use replicated patterns. */ -+/* { dg-options "-mthumb -O2" } */ -+/* { dg-require-effective-target arm_thumb2_ok } */ -+ -+int -+foo1 (int a) -+{ -+ return a + 0xfe00fe01; -+} -+ -+/* { dg-final { scan-assembler "add.*#-33489408" } } */ -+/* { dg-final { scan-assembler "add.*#1" } } */ -+ -+int -+foo2 (int a) -+{ -+ return a + 0xdd01dd00; -+} -+ -+/* { dg-final { scan-assembler "add.*#-587145984" } } */ -+/* { dg-final { scan-assembler "add.*#65536" } } */ -+ -+int -+foo3 (int a) -+{ -+ return a + 0x00443344; -+} -+ -+/* { dg-final { scan-assembler "add.*#4456516" } } */ -+/* { dg-final { scan-assembler "add.*#13056" } } */ -+ -+int -+foo4 (int a) -+{ -+ return a + 0x77330033; -+} -+ -+/* { dg-final { scan-assembler "add.*#1996488704" } } */ -+/* { dg-final { scan-assembler "add.*#3342387" } } */ -+ -+int -+foo5 (int a) -+{ -+ return a + 0x11221122; -+} -+ -+/* { dg-final { scan-assembler "add.*#285217024" } } */ -+/* { dg-final { scan-assembler "add.*#2228258" } } */ -+ -+int -+foo6 (int a) -+{ -+ return a + 0x66666677; -+} -+ -+/* { dg-final { scan-assembler "add.*#1717986918" } } */ -+/* { dg-final { scan-assembler "add.*#17" } } */ -+ -+int -+foo7 (int a) -+{ -+ return a + 0x99888888; -+} -+ -+/* { dg-final { scan-assembler "add.*#-2004318072" } } */ -+/* { dg-final { scan-assembler "add.*#285212672" } } */ -+ -+int -+foo8 (int a) -+{ -+ return a + 0xdddddfff; -+} -+ -+/* { dg-final { scan-assembler "add.*#-572662307" } } */ -+/* { dg-final { scan-assembler "addw.*#546" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c -@@ -0,0 +1,28 @@ -+/* Ensure negated/inverted replicated constant immediates work. */ -+/* { dg-options "-mthumb -O2" } */ -+/* { dg-require-effective-target arm_thumb2_ok } */ -+ -+int -+foo1 (int a) -+{ -+ return a | 0xffffff00; -+} -+ -+/* { dg-final { scan-assembler "orn.*#255" } } */ -+ -+int -+foo2 (int a) -+{ -+ return a & 0xffeeffee; -+} -+ -+/* { dg-final { scan-assembler "bic.*#1114129" } } */ -+ -+int -+foo3 (int a) -+{ -+ return a & 0xaaaaaa00; -+} -+ -+/* { dg-final { scan-assembler "and.*#-1431655766" } } */ -+/* { dg-final { scan-assembler "bic.*#170" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c -+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c -@@ -0,0 +1,22 @@ -+/* Ensure replicated constants don't make things worse. */ -+/* { dg-options "-mthumb -O2" } */ -+/* { dg-require-effective-target arm_thumb2_ok } */ -+ -+int -+foo1 (int a) -+{ -+ /* It might be tempting to use 0x01000100, but it wouldn't help. */ -+ return a + 0x01f001e0; -+} -+ -+/* { dg-final { scan-assembler "add.*#32505856" } } */ -+/* { dg-final { scan-assembler "add.*#480" } } */ -+ -+int -+foo2 (int a) -+{ -+ return a + 0x0f100e10; -+} -+ -+/* { dg-final { scan-assembler "add.*#252706816" } } */ -+/* { dg-final { scan-assembler "add.*#3600" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_unaligned } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void unknown_alignment (char *dest, char *src) -+{ -+ memcpy (dest, src, 15); -+} -+ -+/* We should see three unaligned word loads and store pairs, one unaligned -+ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */ -+ -+/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */ -+/* { dg-final { scan-assembler-times "ldrh" 1 } } */ -+/* { dg-final { scan-assembler-times "strh" 1 } } */ -+/* { dg-final { scan-assembler-times "ldrb" 1 } } */ -+/* { dg-final { scan-assembler-times "strb" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_unaligned } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+char dest[16]; -+ -+void aligned_dest (char *src) -+{ -+ memcpy (dest, src, 15); -+} -+ -+/* Expect a multi-word store for the main part of the copy, but subword -+ loads/stores for the remainder. */ -+ -+/* { dg-final { scan-assembler-times "stmia" 1 } } */ -+/* { dg-final { scan-assembler-times "ldrh" 1 } } */ -+/* { dg-final { scan-assembler-times "strh" 1 } } */ -+/* { dg-final { scan-assembler-times "ldrb" 1 } } */ -+/* { dg-final { scan-assembler-times "strb" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c -+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_unaligned } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+char src[16]; -+ -+void aligned_src (char *dest) -+{ -+ memcpy (dest, src, 15); -+} -+ -+/* Expect a multi-word load for the main part of the copy, but subword -+ loads/stores for the remainder. */ -+ -+/* { dg-final { scan-assembler-times "ldmia" 1 } } */ -+/* { dg-final { scan-assembler-times "ldrh" 1 } } */ -+/* { dg-final { scan-assembler-times "strh" 1 } } */ -+/* { dg-final { scan-assembler-times "ldrb" 1 } } */ -+/* { dg-final { scan-assembler-times "strb" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c -+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_unaligned } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+char src[16]; -+char dest[16]; -+ -+void aligned_both (void) -+{ -+ memcpy (dest, src, 15); -+} -+ -+/* We know both src and dest to be aligned: expect multiword loads/stores. */ -+ -+/* { dg-final { scan-assembler-times "ldmia" 1 } } */ -+/* { dg-final { scan-assembler-times "stmia" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv6" } */ -+ -+unsigned char foo (unsigned char c) -+{ -+ return (c >= '0') && (c <= '9'); -+} -+ -+/* { dg-final { scan-assembler-not "uxtb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c -@@ -127,13 +127,13 @@ - - void test_ldst (float f[], double d[]) { - /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */ -- /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */ -+ /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ - /* { dg-final { scan-assembler "add.+ r0, #1024" } } */ -- /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */ -+ /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */ - f[256] = f[255] + f[-255]; - - /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */ -- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */ -+ /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ - /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */ - d[32] = d[127] + d[-127]; - } ---- a/src/gcc/testsuite/gcc.target/arm/wmul-10.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-10.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+unsigned long long -+foo (unsigned short a, unsigned short *b, unsigned short *c) -+{ -+ return (unsigned)a + (unsigned long long)*b * (unsigned long long)*c; -+} -+ -+/* { dg-final { scan-assembler "umlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-11.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-11.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (int *b) -+{ -+ return 10 * (long long)*b; -+} -+ -+/* { dg-final { scan-assembler "smull" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-12.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-12.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (int *b, int *c) -+{ -+ long long tmp = (long long)*b * *c; -+ return 10 + tmp; -+} -+ -+/* { dg-final { scan-assembler "smlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-13.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-13.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (int *a, int *b) -+{ -+ return *a + (long long)*b * 10; -+} -+ -+/* { dg-final { scan-assembler "smlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-5.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-5.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (long long a, char *b, char *c) -+{ -+ return a + *b * *c; -+} -+ -+/* { dg-final { scan-assembler "umlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-6.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-6.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (long long a, unsigned char *b, signed char *c) -+{ -+ return a + (long long)*b * (long long)*c; -+} -+ -+/* { dg-final { scan-assembler "smlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-7.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-7.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+unsigned long long -+foo (unsigned long long a, unsigned char *b, unsigned short *c) -+{ -+ return a + *b * *c; -+} -+ -+/* { dg-final { scan-assembler "umlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-8.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-8.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (long long a, int *b, int *c) -+{ -+ return a + (long long)*b * *c; -+} -+ -+/* { dg-final { scan-assembler "smlal" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-9.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-9.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+long long -+foo (long long a, short *b, char *c) -+{ -+ return a + *b * *c; -+} -+ -+/* { dg-final { scan-assembler "smlalbb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+struct bf -+{ -+ int a : 3; -+ int b : 15; -+ int c : 3; -+}; -+ -+long long -+foo (long long a, struct bf b, struct bf c) -+{ -+ return a + b.b * c.b; -+} -+ -+/* { dg-final { scan-assembler "smlalbb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv7-a" } */ -+ -+struct bf -+{ -+ int a : 3; -+ unsigned int b : 15; -+ int c : 3; -+}; -+ -+long long -+foo (long long a, struct bf b, struct bf c) -+{ -+ return a + b.b * c.c; -+} -+ -+/* { dg-final { scan-assembler "smlalbb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/xor-and.c -+++ b/src/gcc/testsuite/gcc.target/arm/xor-and.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -march=armv6" } */ -+ -+unsigned short foo (unsigned short x) -+{ -+ x ^= 0x4002; -+ x >>= 1; -+ x |= 0x8000; -+ return x; -+} -+ -+/* { dg-final { scan-assembler "orr" } } */ -+/* { dg-final { scan-assembler-not "mvn" } } */ -+/* { dg-final { scan-assembler-not "uxth" } } */ ---- a/src/gcc/testsuite/gcc.target/sparc/ultrasp12.c -+++ b/src/gcc/testsuite/gcc.target/sparc/ultrasp12.c -@@ -0,0 +1,64 @@ -+/* PR rtl-optimization/48830 */ -+/* Testcase by Hans-Peter Nilsson */ -+ -+/* { dg-do compile } */ -+/* { dg-require-effective-target lp64 } */ -+/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */ -+ -+typedef unsigned char uint8_t; -+typedef unsigned int uint32_t; -+typedef unsigned long int uint64_t; -+typedef unsigned long int uintmax_t; -+typedef unsigned char rc_vec_t __attribute__((__vector_size__(8))); -+typedef short rc_svec_type_ __attribute__((__vector_size__(8))); -+typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4))); -+ -+void -+rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim, -+ const uint8_t *__restrict src2, int src2_dim, -+ int len, int height, uintmax_t sum[5]) -+{ -+ uint32_t s1 = 0; -+ uint32_t s2 = 0; -+ uintmax_t s11 = 0; -+ uintmax_t s22 = 0; -+ uintmax_t s12 = 0; -+ int full = len / ((1024) < (1024) ? (1024) : (1024)); -+ int rem = len % ((1024) < (1024) ? (1024) : (1024)); -+ int rem1 = rem / 1; -+ int y; -+ unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_ __attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_ __attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__ ((__unused__)) = 0; -+ for (y = 0; y < height; y++) { -+ rc_vec_t a1, a2, a11, a22, a12; -+ int i1 = (y)*(src1_dim); -+ int i2 = (y)*(src2_dim); -+ int x; -+ ((a1) = ((rc_vec_t) {0})); -+ ((a2) = ((rc_vec_t) {0})); -+ ((a11) = ((rc_vec_t) {0})); -+ ((a22) = ((rc_vec_t) {0})); -+ ((a12) = ((rc_vec_t) {0})); -+ for (x = 0; x < full; x++) { -+ int k; -+ for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) / -+ 1; k++) -+ { -+ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); -+ -+ } -+ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); -+ } -+ for (x = 0; x < rem1; x++) { -+ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); -+ } -+ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); -+ -+ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); -+ } -+ sum[0] = s1; -+ sum[1] = s2; -+ sum[2] = s11; -+ sum[3] = s22; -+ sum[4] = s12; -+ ; -+} ---- a/src/gcc/testsuite/gfortran.dg/implicit_pure_1.f90 -+++ b/src/gcc/testsuite/gfortran.dg/implicit_pure_1.f90 -@@ -0,0 +1,53 @@ -+! { dg-do run } -+! -+! PR fortran/51218 -+! -+! Contributed by Harald Anlauf -+! -+ -+module a -+ implicit none -+ integer :: neval = 0 -+contains -+ subroutine inc_eval -+ neval = neval + 1 -+ end subroutine inc_eval -+end module a -+ -+module b -+ use a -+ implicit none -+contains -+ function f(x) ! Should be implicit pure -+ real :: f -+ real, intent(in) :: x -+ f = x -+ end function f -+ -+ function g(x) ! Should NOT be implicit pure -+ real :: g -+ real, intent(in) :: x -+ call inc_eval -+ g = x -+ end function g -+end module b -+ -+program gfcbug114a -+ use a -+ use b -+ implicit none -+ real :: x = 1, y = 1, t, u, v, w -+ if (neval /= 0) call abort () -+ t = f(x)*f(y) -+ if (neval /= 0) call abort () -+ u = f(x)*f(y) + f(x)*f(y) -+ if (neval /= 0) call abort () -+ v = g(x)*g(y) -+ if (neval /= 2) call abort () -+ w = g(x)*g(y) + g(x)*g(y) -+ if (neval /= 6) call abort () -+ if (t /= 1.0 .or. u /= 2.0 .or. v /= 1.0 .or. w /= 2) call abort () -+end program gfcbug114a -+ -+! { dg-final { scan-module "b" "IMPLICIT_PURE" } } -+! { dg-final { cleanup-modules "b" } } ---- a/src/gcc/testsuite/gfortran.dg/pr50875.f90 -+++ b/src/gcc/testsuite/gfortran.dg/pr50875.f90 -@@ -0,0 +1,39 @@ -+! { dg-do compile { target { i?86-*-* x86_64-*-* } } } -+! { dg-options "-O3 -mavx" } -+! -+! PR fortran/50875.f90 -+ -+module test -+ -+ implicit none -+ -+ integer, parameter :: dp=kind(1.d0) -+ -+ integer :: P = 2 -+ -+ real(kind=dp), allocatable :: real_array_A(:),real_array_B(:,:) -+ complex(kind=dp), allocatable :: cmplx_array_A(:) -+ -+contains -+ -+ subroutine routine_A -+ -+ integer :: i -+ -+ allocate(cmplx_array_A(P),real_array_B(P,P),real_array_A(P)) -+ -+ real_array_A = 1 -+ real_array_B = 1 -+ -+ do i = 1, p -+ cmplx_array_A = cmplx(real_array_B(:,i),0.0_dp,dp) -+ cmplx_array_A = cmplx_array_A * exp(cmplx(0.0_dp,real_array_A+1)) -+ end do -+ -+ deallocate(cmplx_array_A,real_array_B,real_array_A) -+ -+ end subroutine routine_A -+ -+end module test -+ -+! { dg-final { cleanup-modules "test" } } ---- a/src/gcc/testsuite/gfortran.dg/vect/pr19049.f90 -+++ b/src/gcc/testsuite/gfortran.dg/vect/pr19049.f90 -@@ -19,6 +19,7 @@ - end + vst2q_lane_u32 (arg0_uint32_t, arg1_uint32x4x2_t, 1); + } - ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } --! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } } -+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } } -+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } } - ! { dg-final { cleanup-tree-dump "vect" } } +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c +@@ -16,6 +16,6 @@ + vst2q_p16 (arg0_poly16_t, arg1_poly16x8x2_t); + } ---- a/src/gcc/testsuite/gfortran.dg/whole_file_35.f90 -+++ b/src/gcc/testsuite/gfortran.dg/whole_file_35.f90 -@@ -0,0 +1,28 @@ -+! { dg-do compile } -+! -+! PR fortran/50408 -+! -+! Contributed by Vittorio Zecca -+! -+ module m -+ type int -+ integer :: val -+ end type int -+ interface ichar -+ module procedure uch -+ end interface -+ contains -+ function uch (c) -+ character (len=1), intent (in) :: c -+ type (int) :: uch -+ intrinsic ichar -+ uch%val = 127 - ichar (c) -+ end function uch -+ end module m -+ -+ program p -+ use m -+ print *,ichar('~') ! must print "1" -+ end program p -+ -+! { dg-final { cleanup-modules "m" } } ---- a/src/gcc/testsuite/lib/target-supports.exp -+++ b/src/gcc/testsuite/lib/target-supports.exp -@@ -1894,6 +1894,18 @@ - }] +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c +@@ -16,6 +16,6 @@ + vst2q_p8 (arg0_poly8_t, arg1_poly8x16x2_t); } -+# Return 1 if this is an ARM target that supports unaligned word/halfword -+# load/store instructions. -+ -+proc check_effective_target_arm_unaligned { } { -+ return [check_no_compiler_messages arm_unaligned assembly { -+ #ifndef __ARM_FEATURE_UNALIGNED -+ #error no unaligned support -+ #endif -+ int i; -+ }] -+} -+ - # Add the options needed for NEON. We need either -mfloat-abi=softfp - # or -mfloat-abi=hard, but if one is already specified by the - # multilib, use it. Similarly, if a -mfpu option already enables -@@ -1988,6 +2000,47 @@ - check_effective_target_arm_fp16_ok_nocache] +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c +@@ -16,6 +16,6 @@ + vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t); } -+# Creates a series of routines that return 1 if the given architecture -+# can be selected and a routine to give the flags to select that architecture -+# Note: Extra flags may be added to disable options from newer compilers -+# (Thumb in particular - but others may be added in the future) -+# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */ -+# /* { dg-add-options arm_arch_v5 } */ -+foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__ -+ v6 "-march=armv6" __ARM_ARCH_6__ -+ v6k "-march=armv6k" __ARM_ARCH_6K__ -+ v7a "-march=armv7-a" __ARM_ARCH_7A__ } { -+ eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { -+ proc check_effective_target_arm_arch_FUNC_ok { } { -+ if { [ string match "*-marm*" "FLAG" ] && -+ ![check_effective_target_arm_arm_ok] } { -+ return 0 -+ } -+ return [check_no_compiler_messages arm_arch_FUNC_ok assembly { -+ #if !defined (DEF) -+ #error FOO -+ #endif -+ } "FLAG" ] -+ } -+ -+ proc add_options_for_arm_arch_FUNC { flags } { -+ return "$flags FLAG" -+ } -+ }] -+} -+ -+# Return 1 if this is an ARM target where -marm causes ARM to be -+# used (not Thumb) -+ -+proc check_effective_target_arm_arm_ok { } { -+ return [check_no_compiler_messages arm_arm_ok assembly { -+ #if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__) -+ #error FOO -+ #endif -+ } "-marm"] -+} -+ -+ - # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be - # used. +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c +@@ -16,6 +16,6 @@ + vst2q_s32 (arg0_int32_t, arg1_int32x4x2_t); + } -@@ -2338,6 +2391,26 @@ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c +@@ -16,6 +16,6 @@ + vst2q_s8 (arg0_int8_t, arg1_int8x16x2_t); } +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c +@@ -16,6 +16,6 @@ + vst2q_u16 (arg0_uint16_t, arg1_uint16x8x2_t); + } -+# Return 1 if the target supports hardware vector shift operation for char. -+ -+proc check_effective_target_vect_shift_char { } { -+ global et_vect_shift_char_saved -+ -+ if [info exists et_vect_shift_char_saved] { -+ verbose "check_effective_target_vect_shift_char: using cached result" 2 -+ } else { -+ set et_vect_shift_char_saved 0 -+ if { ([istarget powerpc*-*-*] -+ && ![istarget powerpc-*-linux*paired*]) -+ || [check_effective_target_arm32] } { -+ set et_vect_shift_char_saved 1 -+ } -+ } -+ -+ verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2 -+ return $et_vect_shift_char_saved -+} -+ - # Return 1 if the target supports hardware vectors of long, 0 otherwise. - # - # This can change for different subtargets so do not cache the result. -@@ -2673,7 +2746,8 @@ - } else { - set et_vect_widen_mult_qi_to_hi_saved 0 - } -- if { [istarget powerpc*-*-*] } { -+ if { [istarget powerpc*-*-*] -+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { - set et_vect_widen_mult_qi_to_hi_saved 1 - } - } -@@ -2706,7 +2780,8 @@ - || [istarget spu-*-*] - || [istarget ia64-*-*] - || [istarget i?86-*-*] -- || [istarget x86_64-*-*] } { -+ || [istarget x86_64-*-*] -+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { - set et_vect_widen_mult_hi_to_si_saved 1 - } - } -@@ -2715,6 +2790,72 @@ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c +@@ -16,6 +16,6 @@ + vst2q_u32 (arg0_uint32_t, arg1_uint32x4x2_t); } - # Return 1 if the target plus current options supports a vector -+# widening multiplication of *char* args into *short* result, 0 otherwise. -+# -+# This won't change for different subtargets so cache the result. -+ -+proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { -+ global et_vect_widen_mult_qi_to_hi_pattern -+ -+ if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] { -+ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2 -+ } else { -+ set et_vect_widen_mult_qi_to_hi_pattern_saved 0 -+ if { [istarget powerpc*-*-*] -+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { -+ set et_vect_widen_mult_qi_to_hi_pattern_saved 1 -+ } -+ } -+ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2 -+ return $et_vect_widen_mult_qi_to_hi_pattern_saved -+} -+ -+# Return 1 if the target plus current options supports a vector -+# widening multiplication of *short* args into *int* result, 0 otherwise. -+# -+# This won't change for different subtargets so cache the result. -+ -+proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { -+ global et_vect_widen_mult_hi_to_si_pattern -+ -+ if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] { -+ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2 -+ } else { -+ set et_vect_widen_mult_hi_to_si_pattern_saved 0 -+ if { [istarget powerpc*-*-*] -+ || [istarget spu-*-*] -+ || [istarget ia64-*-*] -+ || [istarget i?86-*-*] -+ || [istarget x86_64-*-*] -+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { -+ set et_vect_widen_mult_hi_to_si_pattern_saved 1 -+ } -+ } -+ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2 -+ return $et_vect_widen_mult_hi_to_si_pattern_saved -+} -+ -+# Return 1 if the target plus current options supports a vector -+# widening shift, 0 otherwise. -+# -+# This won't change for different subtargets so cache the result. -+ -+proc check_effective_target_vect_widen_shift { } { -+ global et_vect_widen_shift_saved -+ -+ if [info exists et_vect_shift_saved] { -+ verbose "check_effective_target_vect_widen_shift: using cached result" 2 -+ } else { -+ set et_vect_widen_shift_saved 0 -+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { -+ set et_vect_widen_shift_saved 1 -+ } -+ } -+ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2 -+ return $et_vect_widen_shift_saved -+} -+ -+# Return 1 if the target plus current options supports a vector - # dot-product of signed chars, 0 otherwise. - # - # This won't change for different subtargets so cache the result. -@@ -3050,7 +3191,8 @@ - || [istarget ia64-*-*] - || [istarget i?86-*-*] - || [istarget spu-*-*] -- || [istarget x86_64-*-*] } { -+ || [istarget x86_64-*-*] -+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { - set et_vect_cond_saved 1 - } - } -@@ -3149,29 +3291,6 @@ - return $et_vect_extract_even_odd_saved +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c +@@ -16,6 +16,6 @@ + vst2q_u8 (arg0_uint8_t, arg1_uint8x16x2_t); } --# Return 1 if the target supports vector even/odd elements extraction of --# vectors with SImode elements or larger, 0 otherwise. -- --proc check_effective_target_vect_extract_even_odd_wide { } { -- global et_vect_extract_even_odd_wide_saved -- -- if [info exists et_vect_extract_even_odd_wide_saved] { -- verbose "check_effective_target_vect_extract_even_odd_wide: using cached result" 2 -- } else { -- set et_vect_extract_even_odd_wide_saved 0 -- if { [istarget powerpc*-*-*] -- || [istarget i?86-*-*] -- || [istarget x86_64-*-*] -- || [istarget ia64-*-*] -- || [istarget spu-*-*] } { -- set et_vect_extract_even_odd_wide_saved 1 -- } -- } -- -- verbose "check_effective_target_vect_extract_even_wide_odd: returning $et_vect_extract_even_odd_wide_saved" 2 -- return $et_vect_extract_even_odd_wide_saved --} -- - # Return 1 if the target supports vector interleaving, 0 otherwise. +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c +@@ -16,5 +16,5 @@ + vst2_s16 (arg0_int16_t, arg1_int16x4x2_t); + } - proc check_effective_target_vect_interleave { } { -@@ -3194,41 +3313,66 @@ - return $et_vect_interleave_saved +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c +@@ -16,5 +16,5 @@ + vst2_s32 (arg0_int32_t, arg1_int32x2x2_t); } --# Return 1 if the target supports vector interleaving and extract even/odd, 0 otherwise. --proc check_effective_target_vect_strided { } { -- global et_vect_strided_saved -+foreach N {2 3 4 8} { -+ eval [string map [list N $N] { -+ # Return 1 if the target supports 2-vector interleaving -+ proc check_effective_target_vect_stridedN { } { -+ global et_vect_stridedN_saved +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c +@@ -16,5 +16,5 @@ + vst2_s64 (arg0_int64_t, arg1_int64x1x2_t); + } -- if [info exists et_vect_strided_saved] { -- verbose "check_effective_target_vect_strided: using cached result" 2 -+ if [info exists et_vect_stridedN_saved] { -+ verbose "check_effective_target_vect_stridedN: using cached result" 2 -+ } else { -+ set et_vect_stridedN_saved 0 -+ if { (N & -N) == N -+ && [check_effective_target_vect_interleave] -+ && [check_effective_target_vect_extract_even_odd] } { -+ set et_vect_stridedN_saved 1 -+ } -+ if { [istarget arm*-*-*] && N >= 2 && N <= 4 } { -+ set et_vect_stridedN_saved 1 -+ } -+ } -+ -+ verbose "check_effective_target_vect_stridedN: returning $et_vect_stridedN_saved" 2 -+ return $et_vect_stridedN_saved -+ } -+ }] -+} -+ -+# Return 1 if the target supports multiple vector sizes -+ -+proc check_effective_target_vect_multiple_sizes { } { -+ global et_vect_multiple_sizes_saved -+ -+ if [info exists et_vect_multiple_sizes_saved] { -+ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 - } else { -- set et_vect_strided_saved 0 -- if { [check_effective_target_vect_interleave] -- && [check_effective_target_vect_extract_even_odd] } { -- set et_vect_strided_saved 1 -+ set et_vect_multiple_sizes_saved 0 -+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { -+ set et_vect_multiple_sizes_saved 1 - } - } +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c +@@ -16,5 +16,5 @@ + vst2_s8 (arg0_int8_t, arg1_int8x8x2_t); + } -- verbose "check_effective_target_vect_strided: returning $et_vect_strided_saved" 2 -- return $et_vect_strided_saved -+ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2 -+ return $et_vect_multiple_sizes_saved +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c +@@ -16,5 +16,5 @@ + vst2_u16 (arg0_uint16_t, arg1_uint16x4x2_t); } --# Return 1 if the target supports vector interleaving and extract even/odd --# for wide element types, 0 otherwise. --proc check_effective_target_vect_strided_wide { } { -- global et_vect_strided_wide_saved -+# Return 1 if the target supports vectors of 64 bits. +-/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c +@@ -16,5 +16,5 @@ + vst2_u32 (arg0_uint32_t, arg1_uint32x2x2_t); + } -- if [info exists et_vect_strided_wide_saved] { -- verbose "check_effective_target_vect_strided_wide: using cached result" 2 -+proc check_effective_target_vect64 { } { -+ global et_vect64_saved -+ -+ if [info exists et_vect64_saved] { -+ verbose "check_effective_target_vect64: using cached result" 2 - } else { -- set et_vect_strided_wide_saved 0 -- if { [check_effective_target_vect_interleave] -- && [check_effective_target_vect_extract_even_odd_wide] } { -- set et_vect_strided_wide_saved 1 -+ set et_vect64_saved 0 -+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { -+ set et_vect64_saved 1 - } - } +-/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c +@@ -16,5 +16,5 @@ + vst2_u64 (arg0_uint64_t, arg1_uint64x1x2_t); + } -- verbose "check_effective_target_vect_strided_wide: returning $et_vect_strided_wide_saved" 2 -- return $et_vect_strided_wide_saved -+ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2 -+ return $et_vect64_saved +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c +@@ -16,5 +16,5 @@ + vst2_u8 (arg0_uint8_t, arg1_uint8x8x2_t); } - # Return 1 if the target supports section-anchors -@@ -3281,6 +3425,31 @@ - return $et_sync_int_long_saved +-/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c +@@ -16,5 +16,5 @@ + vst3_f32 (arg0_float32_t, arg1_float32x2x3_t); } -+# Return 1 if the target supports atomic operations on "long long" and can -+# execute them -+# So far only put checks in for ARM, others may want to add their own -+proc check_effective_target_sync_longlong { } { -+ return [check_runtime sync_longlong_runtime { -+ #include -+ int main () -+ { -+ long long l1; -+ -+ if (sizeof (long long) != 8) -+ exit (1); -+ -+ #ifdef __arm__ -+ /* Just check for native; checking for kernel fallback is tricky. */ -+ asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1"); -+ #else -+ # error "Add other suitable archs here" -+ #endif -+ -+ exit (0); -+ } -+ } "" ] -+} -+ - # Return 1 if the target supports atomic operations on "char" and "short". +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c +@@ -16,5 +16,5 @@ + vst3_lane_f32 (arg0_float32_t, arg1_float32x2x3_t, 1); + } - proc check_effective_target_sync_char_short { } { -@@ -3614,11 +3783,11 @@ - return $flags +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c +@@ -16,5 +16,5 @@ + vst3_lane_p16 (arg0_poly16_t, arg1_poly16x4x3_t, 1); } --# Add to FLAGS the flags needed to enable 128-bit vectors. -+# Add to FLAGS the flags needed to enable 64-bit vectors. +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c +@@ -16,5 +16,5 @@ + vst3_lane_p8 (arg0_poly8_t, arg1_poly8x8x3_t, 1); + } --proc add_options_for_quad_vectors { flags } { -+proc add_options_for_double_vectors { flags } { - if [is-effective-target arm_neon_ok] { -- return "$flags -mvectorize-with-neon-quad" -+ return "$flags -mvectorize-with-neon-double" - } +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c +@@ -16,5 +16,5 @@ + vst3_lane_s16 (arg0_int16_t, arg1_int16x4x3_t, 1); + } - return $flags ---- a/src/gcc/tree-cfg.c -+++ b/src/gcc/tree-cfg.c -@@ -3046,7 +3046,26 @@ - tree fntype; - unsigned i; +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c +@@ -16,5 +16,5 @@ + vst3_lane_s32 (arg0_int32_t, arg1_int32x2x3_t, 1); + } -- if (TREE_CODE (fn) != OBJ_TYPE_REF -+ if (gimple_call_internal_p (stmt)) -+ { -+ if (fn) -+ { -+ error ("gimple call has two targets"); -+ debug_generic_stmt (fn); -+ return true; -+ } -+ } -+ else -+ { -+ if (!fn) -+ { -+ error ("gimple call has no target"); -+ return true; -+ } -+ } -+ -+ if (fn -+ && TREE_CODE (fn) != OBJ_TYPE_REF - && !is_gimple_val (fn)) - { - error ("invalid function in gimple call"); -@@ -3054,9 +3073,10 @@ - return true; - } +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c +@@ -16,5 +16,5 @@ + vst3_lane_s8 (arg0_int8_t, arg1_int8x8x3_t, 1); + } -- if (!POINTER_TYPE_P (TREE_TYPE (fn)) -- || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE -- && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE)) -+ if (fn -+ && (!POINTER_TYPE_P (TREE_TYPE (fn)) -+ || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE -+ && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE))) - { - error ("non-function in gimple call"); - return true; -@@ -3076,8 +3096,12 @@ - return true; - } +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c +@@ -16,5 +16,5 @@ + vst3_lane_u16 (arg0_uint16_t, arg1_uint16x4x3_t, 1); + } -- fntype = TREE_TYPE (TREE_TYPE (fn)); -- if (gimple_call_lhs (stmt) -+ if (fn) -+ fntype = TREE_TYPE (TREE_TYPE (fn)); -+ else -+ fntype = NULL_TREE; -+ if (fntype -+ && gimple_call_lhs (stmt) - && !useless_type_conversion_p (TREE_TYPE (gimple_call_lhs (stmt)), - TREE_TYPE (fntype)) - /* ??? At least C++ misses conversions at assignments from -@@ -3449,6 +3473,44 @@ - return false; - } +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c +@@ -16,5 +16,5 @@ + vst3_lane_u32 (arg0_uint32_t, arg1_uint32x2x3_t, 1); + } -+ case WIDEN_LSHIFT_EXPR: -+ { -+ if (!INTEGRAL_TYPE_P (lhs_type) -+ || !INTEGRAL_TYPE_P (rhs1_type) -+ || TREE_CODE (rhs2) != INTEGER_CST -+ || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))) -+ { -+ error ("type mismatch in widening vector shift expression"); -+ debug_generic_expr (lhs_type); -+ debug_generic_expr (rhs1_type); -+ debug_generic_expr (rhs2_type); -+ return true; -+ } -+ -+ return false; -+ } -+ -+ case VEC_WIDEN_LSHIFT_HI_EXPR: -+ case VEC_WIDEN_LSHIFT_LO_EXPR: -+ { -+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE -+ || TREE_CODE (lhs_type) != VECTOR_TYPE -+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) -+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) -+ || TREE_CODE (rhs2) != INTEGER_CST -+ || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type)) -+ > TYPE_PRECISION (TREE_TYPE (lhs_type)))) -+ { -+ error ("type mismatch in widening vector shift expression"); -+ debug_generic_expr (lhs_type); -+ debug_generic_expr (rhs1_type); -+ debug_generic_expr (rhs2_type); -+ return true; -+ } -+ -+ return false; -+ } -+ - case PLUS_EXPR: - case MINUS_EXPR: - { -@@ -3550,7 +3612,7 @@ - case WIDEN_MULT_EXPR: - if (TREE_CODE (lhs_type) != INTEGER_TYPE) - return true; -- return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)) -+ return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)) - || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))); +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c +@@ -16,5 +16,5 @@ + vst3_lane_u8 (arg0_uint8_t, arg1_uint8x8x3_t, 1); + } - case WIDEN_SUM_EXPR: -@@ -3643,7 +3705,7 @@ - && !FIXED_POINT_TYPE_P (rhs1_type)) - || !useless_type_conversion_p (rhs1_type, rhs2_type) - || !useless_type_conversion_p (lhs_type, rhs3_type) -- || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type) -+ || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type) - || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)) - { - error ("type mismatch in widening multiply-accumulate expression"); -@@ -4130,9 +4192,10 @@ - didn't see a function declaration before the call. */ - if (is_gimple_call (stmt)) - { -- tree decl; -+ tree fn, decl; +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c +@@ -16,5 +16,5 @@ + vst3_p16 (arg0_poly16_t, arg1_poly16x4x3_t); + } -- if (!is_gimple_call_addr (gimple_call_fn (stmt))) -+ fn = gimple_call_fn (stmt); -+ if (fn && !is_gimple_call_addr (fn)) - { - error ("invalid function in call statement"); - return true; -@@ -7503,6 +7566,8 @@ - case GIMPLE_CALL: - if (gimple_call_lhs (g)) - break; -+ if (gimple_call_internal_p (g)) -+ break; +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c +@@ -16,5 +16,5 @@ + vst3_p8 (arg0_poly8_t, arg1_poly8x8x3_t); + } - /* This is a naked call, as opposed to a GIMPLE_CALL with an - LHS. All calls whose value is ignored should be ---- a/src/gcc/tree-data-ref.c -+++ b/src/gcc/tree-data-ref.c -@@ -721,11 +721,11 @@ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c +@@ -16,6 +16,6 @@ + vst3q_f32 (arg0_float32_t, arg1_float32x4x3_t); } - /* Analyzes the behavior of the memory reference DR in the innermost loop or -- basic block that contains it. Returns true if analysis succeed or false -+ basic block that contains it. Returns true if analysis succeed or false - otherwise. */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c +@@ -16,5 +16,5 @@ + vst3q_lane_f32 (arg0_float32_t, arg1_float32x4x3_t, 1); + } - bool --dr_analyze_innermost (struct data_reference *dr) -+dr_analyze_innermost (struct data_reference *dr, struct loop *nest) - { - gimple stmt = DR_STMT (dr); - struct loop *loop = loop_containing_stmt (stmt); -@@ -768,14 +768,25 @@ - } - else - base = build_fold_addr_expr (base); -+ - if (in_loop) - { - if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, - false)) - { -- if (dump_file && (dump_flags & TDF_DETAILS)) -- fprintf (dump_file, "failed: evolution of base is not affine.\n"); -- return false; -+ if (nest) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "failed: evolution of base is not" -+ " affine.\n"); -+ return false; -+ } -+ else -+ { -+ base_iv.base = base; -+ base_iv.step = ssize_int (0); -+ base_iv.no_overflow = true; -+ } - } - } - else -@@ -800,10 +811,18 @@ - else if (!simple_iv (loop, loop_containing_stmt (stmt), - poffset, &offset_iv, false)) - { -- if (dump_file && (dump_flags & TDF_DETAILS)) -- fprintf (dump_file, "failed: evolution of offset is not" -- " affine.\n"); -- return false; -+ if (nest) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "failed: evolution of offset is not" -+ " affine.\n"); -+ return false; -+ } -+ else -+ { -+ offset_iv.base = poffset; -+ offset_iv.step = ssize_int (0); -+ } - } - } +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c +@@ -16,5 +16,5 @@ + vst3q_lane_p16 (arg0_poly16_t, arg1_poly16x8x3_t, 1); + } -@@ -967,7 +986,7 @@ - DR_REF (dr) = memref; - DR_IS_READ (dr) = is_read; +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c +@@ -16,5 +16,5 @@ + vst3q_lane_s16 (arg0_int16_t, arg1_int16x8x3_t, 1); + } -- dr_analyze_innermost (dr); -+ dr_analyze_innermost (dr, nest); - dr_analyze_indices (dr, nest, loop); - dr_analyze_alias (dr); +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c +@@ -16,5 +16,5 @@ + vst3q_lane_s32 (arg0_int32_t, arg1_int32x4x3_t, 1); + } -@@ -991,6 +1010,48 @@ - return dr; +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c +@@ -16,5 +16,5 @@ + vst3q_lane_u16 (arg0_uint16_t, arg1_uint16x8x3_t, 1); } -+/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical -+ expressions. */ -+static bool -+dr_equal_offsets_p1 (tree offset1, tree offset2) -+{ -+ bool res; -+ -+ STRIP_NOPS (offset1); -+ STRIP_NOPS (offset2); -+ -+ if (offset1 == offset2) -+ return true; -+ -+ if (TREE_CODE (offset1) != TREE_CODE (offset2) -+ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) -+ return false; -+ -+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), -+ TREE_OPERAND (offset2, 0)); -+ -+ if (!res || !BINARY_CLASS_P (offset1)) -+ return res; -+ -+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), -+ TREE_OPERAND (offset2, 1)); -+ -+ return res; -+} -+ -+/* Check if DRA and DRB have equal offsets. */ -+bool -+dr_equal_offsets_p (struct data_reference *dra, -+ struct data_reference *drb) -+{ -+ tree offset1, offset2; -+ -+ offset1 = DR_OFFSET (dra); -+ offset2 = DR_OFFSET (drb); -+ -+ return dr_equal_offsets_p1 (offset1, offset2); -+} -+ - /* Returns true if FNA == FNB. */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c +@@ -16,5 +16,5 @@ + vst3q_lane_u32 (arg0_uint32_t, arg1_uint32x4x3_t, 1); + } - static bool -@@ -4294,7 +4355,7 @@ - DATAREFS. Returns chrec_dont_know when failing to analyze a - difficult case, returns NULL_TREE otherwise. */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c +@@ -16,6 +16,6 @@ + vst3q_p16 (arg0_poly16_t, arg1_poly16x8x3_t); + } --static tree -+tree - find_data_references_in_bb (struct loop *loop, basic_block bb, - VEC (data_reference_p, heap) **datarefs) - { -@@ -5143,7 +5204,7 @@ - DR_STMT (dr) = stmt; - DR_REF (dr) = op0; +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c +@@ -16,6 +16,6 @@ + vst3q_p8 (arg0_poly8_t, arg1_poly8x16x3_t); + } -- res = dr_analyze_innermost (dr) -+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)) - && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c +@@ -16,6 +16,6 @@ + vst3q_s16 (arg0_int16_t, arg1_int16x8x3_t); + } - free_data_ref (dr); -@@ -5183,7 +5244,7 @@ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c +@@ -16,6 +16,6 @@ + vst3q_s32 (arg0_int32_t, arg1_int32x4x3_t); + } - DR_STMT (dr) = stmt; - DR_REF (dr) = *ref->pos; -- dr_analyze_innermost (dr); -+ dr_analyze_innermost (dr, loop_containing_stmt (stmt)); - base_address = DR_BASE_ADDRESS (dr); +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c +@@ -16,6 +16,6 @@ + vst3q_s8 (arg0_int8_t, arg1_int8x16x3_t); + } - if (!base_address) ---- a/src/gcc/tree-data-ref.h -+++ b/src/gcc/tree-data-ref.h -@@ -386,7 +386,7 @@ - DEF_VEC_ALLOC_O (data_ref_loc, heap); +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c +@@ -16,6 +16,6 @@ + vst3q_u16 (arg0_uint16_t, arg1_uint16x8x3_t); + } - bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **); --bool dr_analyze_innermost (struct data_reference *); -+bool dr_analyze_innermost (struct data_reference *, struct loop *); - extern bool compute_data_dependences_for_loop (struct loop *, bool, - VEC (loop_p, heap) **, - VEC (data_reference_p, heap) **, -@@ -426,10 +426,14 @@ - extern void compute_all_dependences (VEC (data_reference_p, heap) *, - VEC (ddr_p, heap) **, VEC (loop_p, heap) *, - bool); -+extern tree find_data_references_in_bb (struct loop *, basic_block, -+ VEC (data_reference_p, heap) **); +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c +@@ -16,6 +16,6 @@ + vst3q_u32 (arg0_uint32_t, arg1_uint32x4x3_t); + } - extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *); - extern bool dr_may_alias_p (const struct data_reference *, - const struct data_reference *); -+extern bool dr_equal_offsets_p (struct data_reference *, -+ struct data_reference *); +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c +@@ -16,6 +16,6 @@ + vst3q_u8 (arg0_uint8_t, arg1_uint8x16x3_t); + } +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c +@@ -16,5 +16,5 @@ + vst3_s16 (arg0_int16_t, arg1_int16x4x3_t); + } - /* Return true when the base objects of data references A and B are ---- a/src/gcc/tree-eh.c -+++ b/src/gcc/tree-eh.c -@@ -2752,7 +2752,7 @@ - || gimple_call_lhs (twos) - || gimple_call_chain (ones) - || gimple_call_chain (twos) -- || !operand_equal_p (gimple_call_fn (ones), gimple_call_fn (twos), 0) -+ || !gimple_call_same_target_p (ones, twos) - || gimple_call_num_args (ones) != gimple_call_num_args (twos)) - return false; +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c +@@ -16,5 +16,5 @@ + vst3_s32 (arg0_int32_t, arg1_int32x2x3_t); + } ---- a/src/gcc/tree-if-conv.c -+++ b/src/gcc/tree-if-conv.c -@@ -464,8 +464,8 @@ - /* Returns true when the memory references of STMT are read or written - unconditionally. In other words, this function returns true when - for every data reference A in STMT there exist other accesses to -- the same data reference with predicates that add up (OR-up) to the -- true predicate: this ensures that the data reference A is touched -+ a data reference with the same base with predicates that add up (OR-up) to -+ the true predicate: this ensures that the data reference A is touched - (read or written) on every iteration of the if-converted loop. */ +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c +@@ -16,5 +16,5 @@ + vst3_s64 (arg0_int64_t, arg1_int64x1x3_t); + } - static bool -@@ -489,21 +489,38 @@ - continue; +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c +@@ -16,5 +16,5 @@ + vst3_s8 (arg0_int8_t, arg1_int8x8x3_t); + } - for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++) -- if (DR_STMT (b) != stmt -- && same_data_refs (a, b)) -- { -- tree cb = bb_predicate (gimple_bb (DR_STMT (b))); -- -- if (DR_RW_UNCONDITIONALLY (b) == 1 -- || is_true_predicate (cb) -- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb), -- ca, cb))) -- { -- DR_RW_UNCONDITIONALLY (a) = 1; -- DR_RW_UNCONDITIONALLY (b) = 1; -- found = true; -- break; -- } -+ { -+ tree ref_base_a = DR_REF (a); -+ tree ref_base_b = DR_REF (b); -+ -+ if (DR_STMT (b) == stmt) -+ continue; -+ -+ while (TREE_CODE (ref_base_a) == COMPONENT_REF -+ || TREE_CODE (ref_base_a) == IMAGPART_EXPR -+ || TREE_CODE (ref_base_a) == REALPART_EXPR) -+ ref_base_a = TREE_OPERAND (ref_base_a, 0); -+ -+ while (TREE_CODE (ref_base_b) == COMPONENT_REF -+ || TREE_CODE (ref_base_b) == IMAGPART_EXPR -+ || TREE_CODE (ref_base_b) == REALPART_EXPR) -+ ref_base_b = TREE_OPERAND (ref_base_b, 0); -+ -+ if (!operand_equal_p (ref_base_a, ref_base_b, 0)) -+ { -+ tree cb = bb_predicate (gimple_bb (DR_STMT (b))); -+ -+ if (DR_RW_UNCONDITIONALLY (b) == 1 -+ || is_true_predicate (cb) -+ || is_true_predicate (ca -+ = fold_or_predicates (EXPR_LOCATION (cb), ca, cb))) -+ { -+ DR_RW_UNCONDITIONALLY (a) = 1; -+ DR_RW_UNCONDITIONALLY (b) = 1; -+ found = true; -+ break; -+ } -+ } - } +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c +@@ -16,5 +16,5 @@ + vst3_u16 (arg0_uint16_t, arg1_uint16x4x3_t); + } - if (!found) ---- a/src/gcc/tree-inline.c -+++ b/src/gcc/tree-inline.c -@@ -3343,6 +3343,7 @@ - case DOT_PROD_EXPR: - case WIDEN_MULT_PLUS_EXPR: - case WIDEN_MULT_MINUS_EXPR: -+ case WIDEN_LSHIFT_EXPR: +-/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c +@@ -16,5 +16,5 @@ + vst3_u32 (arg0_uint32_t, arg1_uint32x2x3_t); + } - case VEC_WIDEN_MULT_HI_EXPR: - case VEC_WIDEN_MULT_LO_EXPR: -@@ -3357,6 +3358,8 @@ - case VEC_EXTRACT_ODD_EXPR: - case VEC_INTERLEAVE_HIGH_EXPR: - case VEC_INTERLEAVE_LOW_EXPR: -+ case VEC_WIDEN_LSHIFT_HI_EXPR: -+ case VEC_WIDEN_LSHIFT_LO_EXPR: +-/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c +@@ -16,5 +16,5 @@ + vst3_u64 (arg0_uint64_t, arg1_uint64x1x3_t); + } - return 1; +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c +@@ -16,5 +16,5 @@ + vst3_u8 (arg0_uint8_t, arg1_uint8x8x3_t); + } -@@ -3474,10 +3477,13 @@ - { - tree decl = gimple_call_fndecl (stmt); - tree addr = gimple_call_fn (stmt); -- tree funtype = TREE_TYPE (addr); -+ tree funtype = NULL_TREE; - bool stdarg = false; +-/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c +@@ -16,5 +16,5 @@ + vst4_f32 (arg0_float32_t, arg1_float32x2x4_t); + } -- if (POINTER_TYPE_P (funtype)) -+ if (addr) -+ funtype = TREE_TYPE (addr); -+ -+ if (funtype && POINTER_TYPE_P (funtype)) - funtype = TREE_TYPE (funtype); +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c +@@ -16,5 +16,5 @@ + vst4_lane_f32 (arg0_float32_t, arg1_float32x2x4_t, 1); + } - /* Do not special case builtins where we see the body. -@@ -3517,7 +3523,7 @@ - if (decl) - funtype = TREE_TYPE (decl); +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c +@@ -16,5 +16,5 @@ + vst4_lane_p16 (arg0_poly16_t, arg1_poly16x4x4_t, 1); + } -- if (!VOID_TYPE_P (TREE_TYPE (funtype))) -+ if (funtype && !VOID_TYPE_P (TREE_TYPE (funtype))) - cost += estimate_move_cost (TREE_TYPE (funtype)); +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c +@@ -16,5 +16,5 @@ + vst4_lane_p8 (arg0_poly8_t, arg1_poly8x8x4_t, 1); + } - if (funtype) ---- a/src/gcc/tree-loop-distribution.c -+++ b/src/gcc/tree-loop-distribution.c -@@ -267,7 +267,7 @@ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c +@@ -16,5 +16,5 @@ + vst4_lane_s16 (arg0_int16_t, arg1_int16x4x4_t, 1); + } - DR_STMT (dr) = stmt; - DR_REF (dr) = op0; -- res = dr_analyze_innermost (dr); -+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)); - gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c +@@ -16,5 +16,5 @@ + vst4_lane_s32 (arg0_int32_t, arg1_int32x2x4_t, 1); + } - nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); ---- a/src/gcc/tree-object-size.c -+++ b/src/gcc/tree-object-size.c -@@ -175,7 +175,7 @@ - unsigned HOST_WIDE_INT sz; - - if (!osi || (object_size_type & 1) != 0 -- || TREE_CODE (pt_var) != SSA_NAME) -+ || TREE_CODE (TREE_OPERAND (pt_var, 0)) != SSA_NAME) - { - sz = compute_builtin_object_size (TREE_OPERAND (pt_var, 0), - object_size_type & ~1); ---- a/src/gcc/tree-predcom.c -+++ b/src/gcc/tree-predcom.c -@@ -1114,7 +1114,7 @@ - memset (&init_dr, 0, sizeof (struct data_reference)); - DR_REF (&init_dr) = init_ref; - DR_STMT (&init_dr) = phi; -- if (!dr_analyze_innermost (&init_dr)) -+ if (!dr_analyze_innermost (&init_dr, loop)) - return NULL; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c +@@ -16,5 +16,5 @@ + vst4_lane_s8 (arg0_int8_t, arg1_int8x8x4_t, 1); + } - if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) ---- a/src/gcc/tree-pretty-print.c -+++ b/src/gcc/tree-pretty-print.c -@@ -1539,6 +1539,7 @@ - case RROTATE_EXPR: - case VEC_LSHIFT_EXPR: - case VEC_RSHIFT_EXPR: -+ case WIDEN_LSHIFT_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - case BIT_AND_EXPR: -@@ -2209,6 +2210,22 @@ - pp_string (buffer, " > "); - break; +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c +@@ -16,5 +16,5 @@ + vst4_lane_u16 (arg0_uint16_t, arg1_uint16x4x4_t, 1); + } -+ case VEC_WIDEN_LSHIFT_HI_EXPR: -+ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); -+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); -+ pp_string (buffer, ", "); -+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); -+ pp_string (buffer, " > "); -+ break; -+ -+ case VEC_WIDEN_LSHIFT_LO_EXPR: -+ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); -+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); -+ pp_string (buffer, ", "); -+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); -+ pp_string (buffer, " > "); -+ break; -+ - case VEC_UNPACK_HI_EXPR: - pp_string (buffer, " VEC_UNPACK_HI_EXPR < "); - dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); -@@ -2531,6 +2548,9 @@ - case RSHIFT_EXPR: - case LROTATE_EXPR: - case RROTATE_EXPR: -+ case VEC_WIDEN_LSHIFT_HI_EXPR: -+ case VEC_WIDEN_LSHIFT_LO_EXPR: -+ case WIDEN_LSHIFT_EXPR: - return 11; +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c +@@ -16,5 +16,5 @@ + vst4_lane_u32 (arg0_uint32_t, arg1_uint32x2x4_t, 1); + } - case WIDEN_SUM_EXPR: -@@ -2706,6 +2726,9 @@ - case VEC_RSHIFT_EXPR: - return "v>>"; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c +@@ -16,5 +16,5 @@ + vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1); + } -+ case WIDEN_LSHIFT_EXPR: -+ return "w<<"; -+ - case POINTER_PLUS_EXPR: - return "+"; +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c +@@ -16,5 +16,5 @@ + vst4_p16 (arg0_poly16_t, arg1_poly16x4x4_t); + } ---- a/src/gcc/tree-ssa-ccp.c -+++ b/src/gcc/tree-ssa-ccp.c -@@ -522,10 +522,6 @@ - val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr), - TREE_OPERAND (base, 0), TREE_OPERAND (base, 1)); - else if (base -- /* ??? While function decls have DECL_ALIGN their addresses -- may encode extra information in the lower bits on some -- targets (PR47239). Simply punt for function decls for now. */ -- && TREE_CODE (base) != FUNCTION_DECL - && ((align = get_object_alignment (base, BIGGEST_ALIGNMENT)) - > BITS_PER_UNIT)) - { -@@ -1279,7 +1275,10 @@ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c +@@ -16,5 +16,5 @@ + vst4_p8 (arg0_poly8_t, arg1_poly8x8x4_t); + } - case GIMPLE_CALL: - { -- tree fn = valueize_op (gimple_call_fn (stmt)); -+ tree fn = gimple_call_fn (stmt); -+ if (!fn) -+ return NULL_TREE; -+ fn = valueize_op (fn); - if (TREE_CODE (fn) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL - && DECL_BUILT_IN (TREE_OPERAND (fn, 0))) -@@ -2317,6 +2316,11 @@ - return true; - } +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c +@@ -16,6 +16,6 @@ + vst4q_f32 (arg0_float32_t, arg1_float32x4x4_t); + } -+ /* Internal calls provide no argument types, so the extra laxity -+ for normal calls does not apply. */ -+ if (gimple_call_internal_p (stmt)) -+ return false; -+ - /* Propagate into the call arguments. Compared to replace_uses_in - this can use the argument slot types for type verification - instead of the current argument type. We also can safely ---- a/src/gcc/tree-ssa-dom.c -+++ b/src/gcc/tree-ssa-dom.c -@@ -64,7 +64,7 @@ - struct { enum tree_code op; tree opnd; } unary; - struct { enum tree_code op; tree opnd0, opnd1; } binary; - struct { enum tree_code op; tree opnd0, opnd1, opnd2; } ternary; -- struct { tree fn; bool pure; size_t nargs; tree *args; } call; -+ struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call; - } ops; - }; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c +@@ -16,5 +16,5 @@ + vst4q_lane_f32 (arg0_float32_t, arg1_float32x4x4_t, 1); + } -@@ -257,7 +257,7 @@ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c +@@ -16,5 +16,5 @@ + vst4q_lane_p16 (arg0_poly16_t, arg1_poly16x8x4_t, 1); + } - expr->type = TREE_TYPE (gimple_call_lhs (stmt)); - expr->kind = EXPR_CALL; -- expr->ops.call.fn = gimple_call_fn (stmt); -+ expr->ops.call.fn_from = stmt; +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c +@@ -16,5 +16,5 @@ + vst4q_lane_s16 (arg0_int16_t, arg1_int16x8x4_t, 1); + } - if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)) - expr->ops.call.pure = true; -@@ -421,8 +421,8 @@ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c +@@ -16,5 +16,5 @@ + vst4q_lane_s32 (arg0_int32_t, arg1_int32x4x4_t, 1); + } - /* If the calls are to different functions, then they - clearly cannot be equal. */ -- if (! operand_equal_p (expr0->ops.call.fn, -- expr1->ops.call.fn, 0)) -+ if (!gimple_call_same_target_p (expr0->ops.call.fn_from, -+ expr1->ops.call.fn_from)) - return false; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c +@@ -16,5 +16,5 @@ + vst4q_lane_u16 (arg0_uint16_t, arg1_uint16x8x4_t, 1); + } - if (! expr0->ops.call.pure) -@@ -502,9 +502,15 @@ - { - size_t i; - enum tree_code code = CALL_EXPR; -+ gimple fn_from; +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c +@@ -16,5 +16,5 @@ + vst4q_lane_u32 (arg0_uint32_t, arg1_uint32x4x4_t, 1); + } - val = iterative_hash_object (code, val); -- val = iterative_hash_expr (expr->ops.call.fn, val); -+ fn_from = expr->ops.call.fn_from; -+ if (gimple_call_internal_p (fn_from)) -+ val = iterative_hash_hashval_t -+ ((hashval_t) gimple_call_internal_fn (fn_from), val); -+ else -+ val = iterative_hash_expr (gimple_call_fn (fn_from), val); - for (i = 0; i < expr->ops.call.nargs; i++) - val = iterative_hash_expr (expr->ops.call.args[i], val); - } -@@ -564,8 +570,14 @@ - { - size_t i; - size_t nargs = element->expr.ops.call.nargs; -+ gimple fn_from; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c +@@ -16,6 +16,6 @@ + vst4q_p16 (arg0_poly16_t, arg1_poly16x8x4_t); + } -- print_generic_expr (stream, element->expr.ops.call.fn, 0); -+ fn_from = element->expr.ops.call.fn_from; -+ if (gimple_call_internal_p (fn_from)) -+ fputs (internal_fn_name (gimple_call_internal_fn (fn_from)), -+ stream); -+ else -+ print_generic_expr (stream, gimple_call_fn (fn_from), 0); - fprintf (stream, " ("); - for (i = 0; i < nargs; i++) - { ---- a/src/gcc/tree-ssa-math-opts.c -+++ b/src/gcc/tree-ssa-math-opts.c -@@ -1266,39 +1266,67 @@ - } - }; +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c +@@ -16,6 +16,6 @@ + vst4q_p8 (arg0_poly8_t, arg1_poly8x16x4_t); + } --/* Return true if RHS is a suitable operand for a widening multiplication. -+/* Build a gimple assignment to cast VAL to TARGET. Insert the statement -+ prior to GSI's current position, and return the fresh SSA name. */ -+ -+static tree -+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, -+ tree target, tree val) -+{ -+ tree result = make_ssa_name (target, NULL); -+ gimple stmt = gimple_build_assign_with_ops (CONVERT_EXPR, result, val, NULL); -+ gimple_set_location (stmt, loc); -+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); -+ return result; -+} -+ -+/* Return true if RHS is a suitable operand for a widening multiplication, -+ assuming a target type of TYPE. - There are two cases: +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c +@@ -16,6 +16,6 @@ + vst4q_s16 (arg0_int16_t, arg1_int16x8x4_t); + } -- - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT -- if so, and store its type in *TYPE_OUT. -+ - RHS makes some value at least twice as wide. Store that value -+ in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c +@@ -16,6 +16,6 @@ + vst4q_s32 (arg0_int32_t, arg1_int32x4x4_t); + } - - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, - but leave *TYPE_OUT untouched. */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c +@@ -16,6 +16,6 @@ + vst4q_s8 (arg0_int8_t, arg1_int8x16x4_t); + } - static bool --is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out) -+is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, -+ tree *new_rhs_out) - { - gimple stmt; -- tree type, type1, rhs1; -+ tree type1, rhs1; - enum tree_code rhs_code; +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c +@@ -16,6 +16,6 @@ + vst4q_u16 (arg0_uint16_t, arg1_uint16x8x4_t); + } - if (TREE_CODE (rhs) == SSA_NAME) - { -- type = TREE_TYPE (rhs); - stmt = SSA_NAME_DEF_STMT (rhs); -- if (!is_gimple_assign (stmt)) -- return false; -+ if (is_gimple_assign (stmt)) -+ { -+ rhs_code = gimple_assign_rhs_code (stmt); -+ if (TREE_CODE (type) == INTEGER_TYPE -+ ? !CONVERT_EXPR_CODE_P (rhs_code) -+ : rhs_code != FIXED_CONVERT_EXPR) -+ rhs1 = rhs; -+ else -+ { -+ rhs1 = gimple_assign_rhs1 (stmt); +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c +@@ -16,6 +16,6 @@ + vst4q_u32 (arg0_uint32_t, arg1_uint32x4x4_t); + } -- rhs_code = gimple_assign_rhs_code (stmt); -- if (TREE_CODE (type) == INTEGER_TYPE -- ? !CONVERT_EXPR_CODE_P (rhs_code) -- : rhs_code != FIXED_CONVERT_EXPR) -- return false; -+ if (TREE_CODE (rhs1) == INTEGER_CST) -+ { -+ *new_rhs_out = rhs1; -+ *type_out = NULL; -+ return true; -+ } -+ } -+ } -+ else -+ rhs1 = rhs; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c +@@ -16,6 +16,6 @@ + vst4q_u8 (arg0_uint8_t, arg1_uint8x16x4_t); + } -- rhs1 = gimple_assign_rhs1 (stmt); - type1 = TREE_TYPE (rhs1); -+ - if (TREE_CODE (type1) != TREE_CODE (type) -- || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) -+ || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) - return false; +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c +@@ -16,5 +16,5 @@ + vst4_s16 (arg0_int16_t, arg1_int16x4x4_t); + } - *new_rhs_out = rhs1; -@@ -1316,28 +1344,29 @@ - return false; +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c +@@ -16,5 +16,5 @@ + vst4_s32 (arg0_int32_t, arg1_int32x2x4_t); } --/* Return true if STMT performs a widening multiplication. If so, -- store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT -- respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting -- those operands to types *TYPE1_OUT and *TYPE2_OUT would give the -- operands of the multiplication. */ -+/* Return true if STMT performs a widening multiplication, assuming the -+ output type is TYPE. If so, store the unwidened types of the operands -+ in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and -+ *RHS2_OUT such that converting those operands to types *TYPE1_OUT -+ and *TYPE2_OUT would give the operands of the multiplication. */ +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c +@@ -16,5 +16,5 @@ + vst4_s64 (arg0_int64_t, arg1_int64x1x4_t); + } - static bool - is_widening_mult_p (gimple stmt, - tree *type1_out, tree *rhs1_out, - tree *type2_out, tree *rhs2_out) - { -- tree type; -+ tree type = TREE_TYPE (gimple_assign_lhs (stmt)); +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c +@@ -16,5 +16,5 @@ + vst4_s8 (arg0_int8_t, arg1_int8x8x4_t); + } -- type = TREE_TYPE (gimple_assign_lhs (stmt)); - if (TREE_CODE (type) != INTEGER_TYPE - && TREE_CODE (type) != FIXED_POINT_TYPE) - return false; +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c +@@ -16,5 +16,5 @@ + vst4_u16 (arg0_uint16_t, arg1_uint16x4x4_t); + } -- if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out)) -+ if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, -+ rhs1_out)) - return false; +-/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c +@@ -16,5 +16,5 @@ + vst4_u32 (arg0_uint32_t, arg1_uint32x2x4_t); + } -- if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out)) -+ if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, -+ rhs2_out)) - return false; +-/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c +@@ -16,5 +16,5 @@ + vst4_u64 (arg0_uint64_t, arg1_uint64x1x4_t); + } - if (*type1_out == NULL) -@@ -1354,6 +1383,18 @@ - *type2_out = *type1_out; - } +-/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c +@@ -16,5 +16,5 @@ + vst4_u8 (arg0_uint8_t, arg1_uint8x8x4_t); + } -+ /* Ensure that the larger of the two operands comes first. */ -+ if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) +-/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ++/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ + /* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -funsafe-math-optimizations" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include ++float32x2_t f_sub_abs_to_vabd_32() ++{ ++ float32x2_t val1 = vdup_n_f32 (10); ++ float32x2_t val2 = vdup_n_f32 (30); ++ float32x2_t sres = vsub_f32(val1, val2); ++ float32x2_t res = vabs_f32 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.f32" } }*/ ++ ++#include ++int8x8_t sub_abs_to_vabd_8() ++{ ++ int8x8_t val1 = vdup_n_s8 (10); ++ int8x8_t val2 = vdup_n_s8 (30); ++ int8x8_t sres = vsub_s8(val1, val2); ++ int8x8_t res = vabs_s8 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s8" } }*/ ++ ++int16x4_t sub_abs_to_vabd_16() ++{ ++ int16x4_t val1 = vdup_n_s16 (10); ++ int16x4_t val2 = vdup_n_s16 (30); ++ int16x4_t sres = vsub_s16(val1, val2); ++ int16x4_t res = vabs_s16 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s16" } }*/ ++ ++int32x2_t sub_abs_to_vabd_32() ++{ ++ int32x2_t val1 = vdup_n_s32 (10); ++ int32x2_t val2 = vdup_n_s32 (30); ++ int32x2_t sres = vsub_s32(val1, val2); ++ int32x2_t res = vabs_s32 (sres); ++ ++ return res; ++} ++/* { dg-final { scan-assembler "vabd\.s32" } }*/ +--- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O1" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++ ++#define SETUP(A) x##A = vld3_u32 (ptr + A * 0x20) ++#define MODIFY(A) x##A = vld3_lane_u32 (ptr + A * 0x20 + 0x10, x##A, 1) ++#define STORE(A) vst3_u32 (ptr + A * 0x20, x##A) ++ ++#define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5) ++ ++void ++bar (uint32_t *ptr, int y) ++{ ++ uint32x2x3_t MANY (SETUP); ++ int *x = __builtin_alloca (y); ++ int z[0x1000]; ++ foo (x, z); ++ MANY (MODIFY); ++ foo (x, z); ++ MANY (STORE); ++} +--- a/src/gcc/testsuite/gcc.target/arm/neon-modes-3.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-3.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include ++ ++void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n) ++{ ++ float32x4x4_t a5, a6, a7, a8, a9; ++ int i; ++ ++ a5 = *src; ++ a6 = *src; ++ a7 = *src; ++ a8 = *src; ++ a9 = *src; ++ while (n--) + { -+ tree tmp; -+ tmp = *type1_out; -+ *type1_out = *type2_out; -+ *type2_out = tmp; -+ tmp = *rhs1_out; -+ *rhs1_out = *rhs2_out; -+ *rhs2_out = tmp; ++ for (i = 0; i < 8; i++) ++ { ++ float32x4x4_t a0, a1, a2, a3, a4; ++ ++ a0 = *src; ++ a1 = *src; ++ a2 = *src; ++ a3 = *src; ++ a4 = *src; ++ *src = a0; ++ *dest = a0.val[0]; ++ *dest = a0.val[3]; ++ *src = a1; ++ *dest = a1.val[0]; ++ *dest = a1.val[3]; ++ *src = a2; ++ *dest = a2.val[0]; ++ *dest = a2.val[3]; ++ *src = a3; ++ *dest = a3.val[0]; ++ *dest = a3.val[3]; ++ *src = a4; ++ *dest = a4.val[0]; ++ *dest = a4.val[3]; ++ } ++ *src = a5; ++ *dest = a5.val[0]; ++ *dest = a5.val[3]; ++ *src = a6; ++ *dest = a6.val[0]; ++ *dest = a6.val[3]; ++ *src = a7; ++ *dest = a7.val[0]; ++ *dest = a7.val[3]; ++ *src = a8; ++ *dest = a8.val[0]; ++ *dest = a8.val[3]; ++ *src = a9; ++ *dest = a9.val[0]; ++ *dest = a9.val[3]; ++ } ++} +--- a/src/gcc/testsuite/gcc.target/arm/neon-vld3-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-vld3-1.c +@@ -0,0 +1,27 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_hw } */ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++ ++uint32_t buffer[12]; ++ ++void __attribute__((noinline)) ++foo (uint32_t *a) ++{ ++ uint32x4x3_t x; ++ ++ x = vld3q_u32 (a); ++ x.val[0] = vaddq_u32 (x.val[0], x.val[1]); ++ vst3q_u32 (a, x); ++} ++ ++int ++main (void) ++{ ++ buffer[0] = 1; ++ buffer[1] = 2; ++ foo (buffer); ++ return buffer[0] != 3; ++} +--- a/src/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ ++/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */ ++ ++/* Verify that VSHR immediate is used. */ ++void f1(int n, unsigned int x[], unsigned int y[]) { ++ int i; ++ for (i = 0; i < n; ++i) ++ y[i] = x[i] >> 3; ++} +--- a/src/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++/* { dg-add-options arm_neon } */ ++ ++void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b) ++{ ++ int i; ++ for (i = 0; i < 9; i++) ++ c[i] = b[i] | (~a[i]); ++} ++void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b) ++{ ++ int i; ++ for (i = 0; i < 9; i++) ++ c[i] = b[i] & (~a[i]); ++} ++ ++/* { dg-final { scan-assembler "vorn\\t" } } */ ++/* { dg-final { scan-assembler "vbic\\t" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ ++/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */ ++ ++/* Verify that VSHR immediate is used. */ ++void f1(int n, int x[], int y[]) { ++ int i; ++ for (i = 0; i < n; ++i) ++ y[i] = x[i] << 3; ++} +--- a/src/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */ ++/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */ ++ ++/* Verify that VSHR immediate is used. */ ++void f1(int n, int x[], int y[]) { ++ int i; ++ for (i = 0; i < n; ++i) ++ y[i] = x[i] >> 3; ++} +--- a/src/gcc/testsuite/gcc.target/arm/neon-vst3-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/neon-vst3-1.c +@@ -0,0 +1,25 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_hw } */ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++ ++uint32_t buffer[64]; ++ ++void __attribute__((noinline)) ++foo (uint32_t *a) ++{ ++ uint32x4x3_t x; ++ ++ x = vld3q_u32 (a); ++ a[35] = 1; ++ vst3q_lane_u32 (a + 32, x, 1); ++} ++ ++int ++main (void) ++{ ++ foo (buffer); ++ return buffer[35] != 1; ++} +--- a/src/gcc/testsuite/gcc.target/arm/no-wmla-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/no-wmla-1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++int ++foo (int a, short b, short c) ++{ ++ int bc = b * c; ++ return a + (short)bc; ++} ++ ++/* { dg-final { scan-assembler "mul" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/pr46329.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr46329.c +@@ -0,0 +1,9 @@ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_neon } */ ++ ++int __attribute__ ((vector_size (32))) x; ++void ++foo (void) ++{ ++ x <<= x; ++} +--- a/src/gcc/testsuite/gcc.target/arm/pr48183.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr48183.c +@@ -0,0 +1,25 @@ ++/* testsuite/gcc.target/arm/pr48183.c */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-O -g" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include ++ ++void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n) ++{ ++ unsigned i; ++ int16x4x2_t input; ++ int32x4x2_t mid; ++ int32x4x2_t output; ++ ++ for (i = 0; i < n/2; i += 8) { ++ input = vld2_s16(src + i); ++ mid.val[0] = vmovl_s16(input.val[0]); ++ mid.val[1] = vmovl_s16(input.val[1]); ++ output.val[0] = vshlq_n_s32(mid.val[0], 8); ++ output.val[1] = vshlq_n_s32(mid.val[1], 8); ++ vst2q_s32((int32_t *)dst + i, output); + } ++} +--- a/src/gcc/testsuite/gcc.target/arm/pr50099.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr50099.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ + - return true; - } - -@@ -1362,10 +1403,15 @@ - value is true iff we converted the statement. */ - - static bool --convert_mult_to_widen (gimple stmt) -+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) - { -- tree lhs, rhs1, rhs2, type, type1, type2; -+ tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL; - enum insn_code handler; -+ enum machine_mode to_mode, from_mode, actual_mode; -+ optab op; -+ int actual_precision; -+ location_t loc = gimple_location (stmt); -+ bool from_unsigned1, from_unsigned2; - - lhs = gimple_assign_lhs (stmt); - type = TREE_TYPE (lhs); -@@ -1375,18 +1421,82 @@ - if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) - return false; - -- if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) -- handler = optab_handler (umul_widen_optab, TYPE_MODE (type)); -- else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) -- handler = optab_handler (smul_widen_optab, TYPE_MODE (type)); -+ to_mode = TYPE_MODE (type); -+ from_mode = TYPE_MODE (type1); -+ from_unsigned1 = TYPE_UNSIGNED (type1); -+ from_unsigned2 = TYPE_UNSIGNED (type2); ++long long foo (signed char * arg) ++{ ++ long long temp_1; + -+ if (from_unsigned1 && from_unsigned2) -+ op = umul_widen_optab; -+ else if (!from_unsigned1 && !from_unsigned2) -+ op = smul_widen_optab; - else -- handler = optab_handler (usmul_widen_optab, TYPE_MODE (type)); -+ op = usmul_widen_optab; ++ temp_1 = arg[256]; ++ return temp_1; ++} +--- a/src/gcc/testsuite/gcc.target/arm/pr50305.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr50305.c +@@ -0,0 +1,60 @@ ++/* { dg-do compile } */ ++/* { dg-skip-if "incompatible options" { arm*-*-* } { "-march=*" } { "-march=armv7-a" } } */ ++/* { dg-options "-O2 -fno-omit-frame-pointer -marm -march=armv7-a -mfpu=vfp3" } */ + -+ handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, -+ 0, &actual_mode); - - if (handler == CODE_FOR_nothing) -- return false; -+ { -+ if (op != smul_widen_optab) -+ { -+ /* We can use a signed multiply with unsigned types as long as -+ there is a wider mode to use, or it is the smaller of the two -+ types that is unsigned. Note that type1 >= type2, always. */ -+ if ((TYPE_UNSIGNED (type1) -+ && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) -+ || (TYPE_UNSIGNED (type2) -+ && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) -+ { -+ from_mode = GET_MODE_WIDER_MODE (from_mode); -+ if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) -+ return false; -+ } ++struct event { ++ unsigned long long id; ++ unsigned int flag; ++}; + -+ op = smul_widen_optab; -+ handler = find_widening_optab_handler_and_mode (op, to_mode, -+ from_mode, 0, -+ &actual_mode); ++void dummy(void) ++{ ++ /* This is here to ensure that the offset of perf_event_id below ++ relative to the LANCHOR symbol exceeds the allowed displacement. */ ++ static int __warned[300]; ++ __warned[0] = 1; ++} + -+ if (handler == CODE_FOR_nothing) -+ return false; - -- gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1)); -- gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2)); -+ from_unsigned1 = from_unsigned2 = false; -+ } -+ else -+ return false; -+ } ++extern void *kmem_cache_alloc_trace (void *cachep); ++extern void *cs_cachep; ++extern int nr_cpu_ids; + -+ /* Ensure that the inputs to the handler are in the correct precison -+ for the opcode. This will be the full mode size. */ -+ actual_precision = GET_MODE_PRECISION (actual_mode); -+ if (actual_precision != TYPE_PRECISION (type1) -+ || from_unsigned1 != TYPE_UNSIGNED (type1)) -+ { -+ tmp = create_tmp_var (build_nonstandard_integer_type -+ (actual_precision, from_unsigned1), -+ NULL); -+ rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1); -+ } -+ if (actual_precision != TYPE_PRECISION (type2) -+ || from_unsigned2 != TYPE_UNSIGNED (type2)) -+ { -+ /* Reuse the same type info, if possible. */ -+ if (!tmp || from_unsigned1 != from_unsigned2) -+ tmp = create_tmp_var (build_nonstandard_integer_type -+ (actual_precision, from_unsigned2), -+ NULL); -+ rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2); -+ } ++struct event * ++event_alloc (int cpu) ++{ ++ static unsigned long long __attribute__((aligned(8))) perf_event_id; ++ struct event *event; ++ unsigned long long result; ++ unsigned long tmp; + -+ /* Handle constants. */ -+ if (TREE_CODE (rhs1) == INTEGER_CST) -+ rhs1 = fold_convert (type1, rhs1); -+ if (TREE_CODE (rhs2) == INTEGER_CST) -+ rhs2 = fold_convert (type2, rhs2); ++ if (cpu >= nr_cpu_ids) ++ return 0; + -+ gimple_assign_set_rhs1 (stmt, rhs1); -+ gimple_assign_set_rhs2 (stmt, rhs2); - gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); - update_stmt (stmt); - return true; -@@ -1403,11 +1513,17 @@ - enum tree_code code) - { - gimple rhs1_stmt = NULL, rhs2_stmt = NULL; -- tree type, type1, type2; -+ gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; -+ tree type, type1, type2, optype, tmp = NULL; - tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; - enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; - optab this_optab; - enum tree_code wmult_code; -+ enum insn_code handler; -+ enum machine_mode to_mode, from_mode, actual_mode; -+ location_t loc = gimple_location (stmt); -+ int actual_precision; -+ bool from_unsigned1, from_unsigned2; - - lhs = gimple_assign_lhs (stmt); - type = TREE_TYPE (lhs); -@@ -1429,8 +1545,6 @@ - if (is_gimple_assign (rhs1_stmt)) - rhs1_code = gimple_assign_rhs_code (rhs1_stmt); - } -- else -- return false; - - if (TREE_CODE (rhs2) == SSA_NAME) - { -@@ -1438,57 +1552,160 @@ - if (is_gimple_assign (rhs2_stmt)) - rhs2_code = gimple_assign_rhs_code (rhs2_stmt); - } -- else -- return false; - -- if (code == PLUS_EXPR && rhs1_code == MULT_EXPR) -+ /* Allow for one conversion statement between the multiply -+ and addition/subtraction statement. If there are more than -+ one conversions then we assume they would invalidate this -+ transformation. If that's not the case then they should have -+ been folded before now. */ -+ if (CONVERT_EXPR_CODE_P (rhs1_code)) ++ event = kmem_cache_alloc_trace (cs_cachep); ++ ++ __asm__ __volatile__ ("dmb" : : : "memory"); ++ ++ __asm__ __volatile__("@ atomic64_add_return\n" ++"1: ldrexd %0, %H0, [%3]\n" ++" adds %0, %0, %4\n" ++" adc %H0, %H0, %H4\n" ++" strexd %1, %0, %H0, [%3]\n" ++" teq %1, #0\n" ++" bne 1b" ++ : "=&r" (result), "=&r" (tmp), "+Qo" (perf_event_id) ++ : "r" (&perf_event_id), "r" (1LL) ++ : "cc"); ++ ++ __asm__ __volatile__ ("dmb" : : : "memory"); ++ ++ event->id = result; ++ ++ if (cpu) ++ event->flag = 1; ++ ++ for (cpu = 0; cpu < nr_cpu_ids; cpu++) ++ kmem_cache_alloc_trace (cs_cachep); ++ ++ return event; ++} ++ +--- a/src/gcc/testsuite/gcc.target/arm/pr50318-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr50318-1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target arm_dsp } */ ++ ++long long test (unsigned int sec, unsigned long long nsecs) ++{ ++ return (long long)(long)sec * 1000000000L + (long long)(unsigned ++ long)nsecs; ++} ++ ++/* { dg-final { scan-assembler "umlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/pr52633.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr52633.c +@@ -0,0 +1,13 @@ ++/* PR tree-optimization/52633 */ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-march=armv7-a -mfloat-abi=softfp -mfpu=neon -O -ftree-vectorize" } */ ++ ++void ++test (unsigned short *x, signed char *y) ++{ ++ int i; ++ for (i = 0; i < 32; i++) ++ x[i] = (short) (y[i] << 5); ++} ++ +--- a/src/gcc/testsuite/gcc.target/arm/pr52686.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr52686.c +@@ -0,0 +1,19 @@ ++/* PR target/52375 */ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-march=armv7-a -mfloat-abi=softfp -mfpu=neon -O -ftree-vectorize" } */ ++ ++unsigned int output[4]; ++ ++void test (unsigned short *p) ++{ ++ unsigned int x = *p; ++ if (x) + { -+ conv1_stmt = rhs1_stmt; -+ rhs1 = gimple_assign_rhs1 (rhs1_stmt); -+ if (TREE_CODE (rhs1) == SSA_NAME) -+ { -+ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); -+ if (is_gimple_assign (rhs1_stmt)) -+ rhs1_code = gimple_assign_rhs_code (rhs1_stmt); -+ } -+ else -+ return false; ++ output[0] = x << 1; ++ output[1] = x << 1; ++ output[2] = x << 1; ++ output[3] = x << 1; + } -+ if (CONVERT_EXPR_CODE_P (rhs2_code)) ++} ++ +--- a/src/gcc/testsuite/gcc.target/arm/pr53636.c ++++ b/src/gcc/testsuite/gcc.target/arm/pr53636.c +@@ -0,0 +1,48 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_hw } */ ++/* { dg-options "-O -ftree-vectorize" } */ ++/* { dg-add-options arm_neon } */ ++ ++void fill (short *buf) __attribute__ ((noinline)); ++void fill (short *buf) ++{ ++ int i; ++ ++ for (i = 0; i < 11 * 8; i++) ++ buf[i] = i; ++} ++ ++void test (unsigned char *dst) __attribute__ ((noinline)); ++void test (unsigned char *dst) ++{ ++ short tmp[11 * 8], *tptr; ++ int i; ++ ++ fill (tmp); ++ ++ tptr = tmp; ++ for (i = 0; i < 8; i++) + { -+ conv2_stmt = rhs2_stmt; -+ rhs2 = gimple_assign_rhs1 (rhs2_stmt); -+ if (TREE_CODE (rhs2) == SSA_NAME) -+ { -+ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); -+ if (is_gimple_assign (rhs2_stmt)) -+ rhs2_code = gimple_assign_rhs_code (rhs2_stmt); -+ } -+ else -+ return false; ++ dst[0] = (-tptr[0] + 9 * tptr[0 + 1] + 9 * tptr[0 + 2] - tptr[0 + 3]) >> 7; ++ dst[1] = (-tptr[1] + 9 * tptr[1 + 1] + 9 * tptr[1 + 2] - tptr[1 + 3]) >> 7; ++ dst[2] = (-tptr[2] + 9 * tptr[2 + 1] + 9 * tptr[2 + 2] - tptr[2 + 3]) >> 7; ++ dst[3] = (-tptr[3] + 9 * tptr[3 + 1] + 9 * tptr[3 + 2] - tptr[3 + 3]) >> 7; ++ dst[4] = (-tptr[4] + 9 * tptr[4 + 1] + 9 * tptr[4 + 2] - tptr[4 + 3]) >> 7; ++ dst[5] = (-tptr[5] + 9 * tptr[5 + 1] + 9 * tptr[5 + 2] - tptr[5 + 3]) >> 7; ++ dst[6] = (-tptr[6] + 9 * tptr[6 + 1] + 9 * tptr[6 + 2] - tptr[6 + 3]) >> 7; ++ dst[7] = (-tptr[7] + 9 * tptr[7 + 1] + 9 * tptr[7 + 2] - tptr[7 + 3]) >> 7; ++ ++ dst += 8; ++ tptr += 11; + } ++} + -+ /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call -+ is_widening_mult_p, but we still need the rhs returns. ++int main (void) ++{ ++ char buf [8 * 8]; + -+ It might also appear that it would be sufficient to use the existing -+ operands of the widening multiply, but that would limit the choice of -+ multiply-and-accumulate instructions. */ -+ if (code == PLUS_EXPR -+ && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) - { - if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, - &type2, &mult_rhs2)) - return false; - add_rhs = rhs2; -+ conv_stmt = conv1_stmt; - } -- else if (rhs2_code == MULT_EXPR) -+ else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) - { - if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, - &type2, &mult_rhs2)) - return false; - add_rhs = rhs1; -- } -- else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR) -- { -- mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt); -- mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt); -- type1 = TREE_TYPE (mult_rhs1); -- type2 = TREE_TYPE (mult_rhs2); -- add_rhs = rhs2; -- } -- else if (rhs2_code == WIDEN_MULT_EXPR) -- { -- mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt); -- mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt); -- type1 = TREE_TYPE (mult_rhs1); -- type2 = TREE_TYPE (mult_rhs2); -- add_rhs = rhs1; -+ conv_stmt = conv2_stmt; - } - else - return false; - -- if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) -- return false; -+ to_mode = TYPE_MODE (type); -+ from_mode = TYPE_MODE (type1); -+ from_unsigned1 = TYPE_UNSIGNED (type1); -+ from_unsigned2 = TYPE_UNSIGNED (type2); ++ test (buf); + -+ /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ -+ if (from_unsigned1 != from_unsigned2) -+ { -+ /* We can use a signed multiply with unsigned types as long as -+ there is a wider mode to use, or it is the smaller of the two -+ types that is unsigned. Note that type1 >= type2, always. */ -+ if ((from_unsigned1 -+ && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) -+ || (from_unsigned2 -+ && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) -+ { -+ from_mode = GET_MODE_WIDER_MODE (from_mode); -+ if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) -+ return false; -+ } ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/arm/sat-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/sat-1.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_arm_ok } */ ++/* { dg-require-effective-target arm_arch_v6_ok } */ ++/* { dg-options "-O2 -marm" } */ ++/* { dg-add-options arm_arch_v6 } */ ++ ++ ++static inline int sat1 (int a, int amin, int amax) ++{ ++ if (a < amin) return amin; ++ else if (a > amax) return amax; ++ else return a; ++} ++ ++static inline int sat2 (int a, int amin, int amax) ++{ ++ if (a > amax) return amax; ++ else if (a < amin) return amin; ++ else return a; ++} ++ ++int u1 (int x) ++{ ++ return sat1 (x, 0, 63); ++} ++ ++int us1 (int x) ++{ ++ return sat1 (x >> 5, 0, 63); ++} ++ ++int s1 (int x) ++{ ++ return sat1 (x, -64, 63); ++} ++ ++int ss1 (int x) ++{ ++ return sat1 (x >> 5, -64, 63); ++} ++ ++int u2 (int x) ++{ ++ return sat2 (x, 0, 63); ++} ++ ++int us2 (int x) ++{ ++ return sat2 (x >> 5, 0, 63); ++} ++ ++int s2 (int x) ++{ ++ return sat2 (x, -64, 63); ++} ++ ++int ss2 (int x) ++{ ++ return sat2 (x >> 5, -64, 63); ++} ++ ++/* { dg-final { scan-assembler-times "usat" 4 } } */ ++/* { dg-final { scan-assembler-times "ssat" 4 } } */ ++ +--- a/src/gcc/testsuite/gcc.target/arm/shiftable.c ++++ b/src/gcc/testsuite/gcc.target/arm/shiftable.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++/* { dg-require-effective-target arm32 } */ ++ ++/* ARM has shift-and-alu insns. Depending on the ALU op GCC represents some ++ of these as a left shift, others as a multiply. Check that we match the ++ right one. */ ++ ++int ++plus (int a, int b) ++{ ++ return (a * 64) + b; ++} ++ ++/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */ ++ ++int ++minus (int a, int b) ++{ ++ return a - (b * 64); ++} ++ ++/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */ ++ ++int ++ior (int a, int b) ++{ ++ return (a * 64) | b; ++} ++ ++/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */ ++ ++int ++xor (int a, int b) ++{ ++ return (a * 64) ^ b; ++} ++ ++/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */ ++ ++int ++and (int a, int b) ++{ ++ return (a * 64) & b; ++} ++ ++/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */ ++ ++int ++rsb (int a, int b) ++{ ++ return (a * 64) - b; ++} ++ ++/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */ ++ ++int ++mvn (int a, int b) ++{ ++ return ~(a * 64); ++} ++ ++/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/smlaltb-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/smlaltb-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long int ++foo (long long x, int in) ++{ ++ short a = in & 0xffff; ++ short b = (in & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlaltb" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/smlaltt-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/smlaltt-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long int ++foo (long long x, int in1, int in2) ++{ ++ short a = (in1 & 0xffff0000) >> 16; ++ short b = (in2 & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlaltt" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/smlatb-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/smlatb-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++int ++foo (int x, int in) ++{ ++ short a = in & 0xffff; ++ short b = (in & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlatb" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/smlatt-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/smlatt-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++int ++foo (int x, int in1, int in2) ++{ ++ short a = (in1 & 0xffff0000) >> 16; ++ short b = (in2 & 0xffff0000) >> 16; ++ ++ return x + b * a; ++} ++ ++/* { dg-final { scan-assembler "smlatt" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c +@@ -0,0 +1,13 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpne" } } */ ++ ++int f(int i, int j) ++{ ++ if ( (i == '+') || (j == '-') ) { ++ return 1; ++ } else { ++ return 0; ++ } ++} +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c +@@ -0,0 +1,13 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpeq" } } */ ++ ++int f(int i, int j) ++{ ++ if ( (i == '+') && (j == '-') ) { ++ return 1; ++ } else { ++ return 0; ++ } ++} +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c +@@ -0,0 +1,12 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpgt" } } */ + -+ from_unsigned1 = from_unsigned2 = false; -+ } ++int f(int i, int j) ++{ ++ if ( (i >= '+') ? (j > '-') : 0) ++ return 1; ++ else ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c +@@ -0,0 +1,12 @@ ++/* Use conditional compare */ ++/* { dg-options "-O2" } */ ++/* { dg-skip-if "" { arm_thumb1_ok } } */ ++/* { dg-final { scan-assembler "cmpgt" } } */ + -+ /* If there was a conversion between the multiply and addition -+ then we need to make sure it fits a multiply-and-accumulate. -+ The should be a single mode change which does not change the -+ value. */ -+ if (conv_stmt) -+ { -+ /* We use the original, unmodified data types for this. */ -+ tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); -+ tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); -+ int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); -+ bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); ++int f(int i, int j) ++{ ++ if ( (i >= '+') ? (j <= '-') : 1) ++ return 1; ++ else ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c +@@ -0,0 +1,27 @@ ++/* Ensure simple replicated constant immediates work. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ + -+ if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) -+ { -+ /* Conversion is a truncate. */ -+ if (TYPE_PRECISION (to_type) < data_size) -+ return false; -+ } -+ else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) -+ { -+ /* Conversion is an extend. Check it's the right sort. */ -+ if (TYPE_UNSIGNED (from_type) != is_unsigned -+ && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) -+ return false; -+ } -+ /* else convert is a no-op for our purposes. */ -+ } - - /* Verify that the machine can perform a widening multiply - accumulate in this mode/signedness combination, otherwise - this transformation is likely to pessimize code. */ -- this_optab = optab_for_tree_code (wmult_code, type1, optab_default); -- if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing) -+ optype = build_nonstandard_integer_type (from_mode, from_unsigned1); -+ this_optab = optab_for_tree_code (wmult_code, optype, optab_default); -+ handler = find_widening_optab_handler_and_mode (this_optab, to_mode, -+ from_mode, 0, &actual_mode); ++int ++foo1 (int a) ++{ ++ return a + 0xfefefefe; ++} + -+ if (handler == CODE_FOR_nothing) - return false; - -- /* ??? May need some type verification here? */ -+ /* Ensure that the inputs to the handler are in the correct precison -+ for the opcode. This will be the full mode size. */ -+ actual_precision = GET_MODE_PRECISION (actual_mode); -+ if (actual_precision != TYPE_PRECISION (type1) -+ || from_unsigned1 != TYPE_UNSIGNED (type1)) -+ { -+ tmp = create_tmp_var (build_nonstandard_integer_type -+ (actual_precision, from_unsigned1), -+ NULL); -+ mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1); -+ } -+ if (actual_precision != TYPE_PRECISION (type2) -+ || from_unsigned2 != TYPE_UNSIGNED (type2)) -+ { -+ if (!tmp || from_unsigned1 != from_unsigned2) -+ tmp = create_tmp_var (build_nonstandard_integer_type -+ (actual_precision, from_unsigned2), -+ NULL); -+ mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2); -+ } ++/* { dg-final { scan-assembler "add.*#-16843010" } } */ + -+ if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) -+ add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL), -+ add_rhs); ++int ++foo2 (int a) ++{ ++ return a - 0xab00ab00; ++} + -+ /* Handle constants. */ -+ if (TREE_CODE (mult_rhs1) == INTEGER_CST) -+ mult_rhs1 = fold_convert (type1, mult_rhs1); -+ if (TREE_CODE (mult_rhs2) == INTEGER_CST) -+ mult_rhs2 = fold_convert (type2, mult_rhs2); - -- gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, -- fold_convert (type1, mult_rhs1), -- fold_convert (type2, mult_rhs2), -+ gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2, - add_rhs); - update_stmt (gsi_stmt (*gsi)); - return true; -@@ -1696,7 +1913,7 @@ - switch (code) - { - case MULT_EXPR: -- if (!convert_mult_to_widen (stmt) -+ if (!convert_mult_to_widen (stmt, &gsi) - && convert_mult_to_fma (stmt, - gimple_assign_rhs1 (stmt), - gimple_assign_rhs2 (stmt))) ---- a/src/gcc/tree-ssa-phiopt.c -+++ b/src/gcc/tree-ssa-phiopt.c -@@ -34,6 +34,8 @@ - #include "langhooks.h" - #include "pointer-set.h" - #include "domwalk.h" -+#include "cfgloop.h" -+#include "tree-data-ref.h" - - static unsigned int tree_ssa_phiopt (void); - static unsigned int tree_ssa_phiopt_worker (bool); -@@ -1292,35 +1294,18 @@ - return true; - } - --/* Do the main work of conditional store replacement. We already know -- that the recognized pattern looks like so: -- -- split: -- if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) -- THEN_BB: -- X = Y; -- goto JOIN_BB; -- ELSE_BB: -- X = Z; -- fallthrough (edge E0) -- JOIN_BB: -- some more -- -- We check that THEN_BB and ELSE_BB contain only one store -- that the stores have a "simple" RHS. */ -+/* Do the main work of conditional store replacement. */ - - static bool --cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, -- basic_block join_bb) -+cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, -+ basic_block join_bb, gimple then_assign, -+ gimple else_assign) - { -- gimple then_assign = last_and_only_stmt (then_bb); -- gimple else_assign = last_and_only_stmt (else_bb); - tree lhs_base, lhs, then_rhs, else_rhs; - source_location then_locus, else_locus; - gimple_stmt_iterator gsi; - gimple newphi, new_stmt; - -- /* Check if then_bb and else_bb contain only one store each. */ - if (then_assign == NULL - || !gimple_assign_single_p (then_assign) - || else_assign == NULL -@@ -1385,6 +1370,190 @@ - return true; - } - -+/* Conditional store replacement. We already know -+ that the recognized pattern looks like so: ++/* { dg-final { scan-assembler "sub.*#-1426019584" } } */ + -+ split: -+ if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) -+ THEN_BB: -+ ... -+ X = Y; -+ ... -+ goto JOIN_BB; -+ ELSE_BB: -+ ... -+ X = Z; -+ ... -+ fallthrough (edge E0) -+ JOIN_BB: -+ some more ++int ++foo3 (int a) ++{ ++ return a & 0x00cd00cd; ++} + -+ We check that it is safe to sink the store to JOIN_BB by verifying that -+ there are no read-after-write or write-after-write dependencies in -+ THEN_BB and ELSE_BB. */ ++/* { dg-final { scan-assembler "and.*#13435085" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c +@@ -0,0 +1,75 @@ ++/* Ensure split constants can use replicated patterns. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ + -+static bool -+cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, -+ basic_block join_bb) ++int ++foo1 (int a) +{ -+ gimple then_assign = last_and_only_stmt (then_bb); -+ gimple else_assign = last_and_only_stmt (else_bb); -+ VEC (data_reference_p, heap) *then_datarefs, *else_datarefs; -+ VEC (ddr_p, heap) *then_ddrs, *else_ddrs; -+ gimple then_store, else_store; -+ bool found, ok = false, res; -+ struct data_dependence_relation *ddr; -+ data_reference_p then_dr, else_dr; -+ int i, j; -+ tree then_lhs, else_lhs; -+ VEC (gimple, heap) *then_stores, *else_stores; -+ basic_block blocks[3]; ++ return a + 0xfe00fe01; ++} + -+ if (MAX_STORES_TO_SINK == 0) -+ return false; ++/* { dg-final { scan-assembler "add.*#-33489408" } } */ ++/* { dg-final { scan-assembler "add.*#1" } } */ + -+ /* Handle the case with single statement in THEN_BB and ELSE_BB. */ -+ if (then_assign && else_assign) -+ return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, -+ then_assign, else_assign); ++int ++foo2 (int a) ++{ ++ return a + 0xdd01dd00; ++} + -+ /* Find data references. */ -+ then_datarefs = VEC_alloc (data_reference_p, heap, 1); -+ else_datarefs = VEC_alloc (data_reference_p, heap, 1); -+ if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) -+ == chrec_dont_know) -+ || !VEC_length (data_reference_p, then_datarefs) -+ || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) -+ == chrec_dont_know) -+ || !VEC_length (data_reference_p, else_datarefs)) -+ { -+ free_data_refs (then_datarefs); -+ free_data_refs (else_datarefs); -+ return false; -+ } ++/* { dg-final { scan-assembler "add.*#-587145984" } } */ ++/* { dg-final { scan-assembler "add.*#65536" } } */ + -+ /* Find pairs of stores with equal LHS. */ -+ then_stores = VEC_alloc (gimple, heap, 1); -+ else_stores = VEC_alloc (gimple, heap, 1); -+ FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr) -+ { -+ if (DR_IS_READ (then_dr)) -+ continue; ++int ++foo3 (int a) ++{ ++ return a + 0x00443344; ++} + -+ then_store = DR_STMT (then_dr); -+ then_lhs = gimple_assign_lhs (then_store); -+ found = false; ++/* { dg-final { scan-assembler "add.*#4456516" } } */ ++/* { dg-final { scan-assembler "add.*#13056" } } */ + -+ FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr) -+ { -+ if (DR_IS_READ (else_dr)) -+ continue; ++int ++foo4 (int a) ++{ ++ return a + 0x77330033; ++} + -+ else_store = DR_STMT (else_dr); -+ else_lhs = gimple_assign_lhs (else_store); ++/* { dg-final { scan-assembler "add.*#1996488704" } } */ ++/* { dg-final { scan-assembler "add.*#3342387" } } */ + -+ if (operand_equal_p (then_lhs, else_lhs, 0)) -+ { -+ found = true; -+ break; -+ } -+ } ++int ++foo5 (int a) ++{ ++ return a + 0x11221122; ++} + -+ if (!found) -+ continue; ++/* { dg-final { scan-assembler "add.*#285217024" } } */ ++/* { dg-final { scan-assembler "add.*#2228258" } } */ + -+ VEC_safe_push (gimple, heap, then_stores, then_store); -+ VEC_safe_push (gimple, heap, else_stores, else_store); -+ } ++int ++foo6 (int a) ++{ ++ return a + 0x66666677; ++} + -+ /* No pairs of stores found. */ -+ if (!VEC_length (gimple, then_stores) -+ || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK) -+ { -+ free_data_refs (then_datarefs); -+ free_data_refs (else_datarefs); -+ VEC_free (gimple, heap, then_stores); -+ VEC_free (gimple, heap, else_stores); -+ return false; -+ } ++/* { dg-final { scan-assembler "add.*#1717986918" } } */ ++/* { dg-final { scan-assembler "add.*#17" } } */ + -+ /* Compute and check data dependencies in both basic blocks. */ -+ then_ddrs = VEC_alloc (ddr_p, heap, 1); -+ else_ddrs = VEC_alloc (ddr_p, heap, 1); -+ compute_all_dependences (then_datarefs, &then_ddrs, NULL, false); -+ compute_all_dependences (else_datarefs, &else_ddrs, NULL, false); -+ blocks[0] = then_bb; -+ blocks[1] = else_bb; -+ blocks[2] = join_bb; -+ renumber_gimple_stmt_uids_in_blocks (blocks, 3); ++int ++foo7 (int a) ++{ ++ return a + 0x99888888; ++} + -+ /* Check that there are no read-after-write or write-after-write dependencies -+ in THEN_BB. */ -+ FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr) -+ { -+ struct data_reference *dra = DDR_A (ddr); -+ struct data_reference *drb = DDR_B (ddr); ++/* { dg-final { scan-assembler "add.*#-2004318072" } } */ ++/* { dg-final { scan-assembler "add.*#285212672" } } */ + -+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known -+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) -+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) -+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra) -+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) -+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) -+ { -+ free_dependence_relations (then_ddrs); -+ free_dependence_relations (else_ddrs); -+ free_data_refs (then_datarefs); -+ free_data_refs (else_datarefs); -+ VEC_free (gimple, heap, then_stores); -+ VEC_free (gimple, heap, else_stores); -+ return false; -+ } -+ } ++int ++foo8 (int a) ++{ ++ return a + 0xdddddfff; ++} + -+ /* Check that there are no read-after-write or write-after-write dependencies -+ in ELSE_BB. */ -+ FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr) -+ { -+ struct data_reference *dra = DDR_A (ddr); -+ struct data_reference *drb = DDR_B (ddr); ++/* { dg-final { scan-assembler "add.*#-572662307" } } */ ++/* { dg-final { scan-assembler "addw.*#546" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c +@@ -0,0 +1,28 @@ ++/* Ensure negated/inverted replicated constant immediates work. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ + -+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known -+ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) -+ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) -+ || (DR_IS_READ (drb) && DR_IS_WRITE (dra) -+ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) -+ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) -+ { -+ free_dependence_relations (then_ddrs); -+ free_dependence_relations (else_ddrs); -+ free_data_refs (then_datarefs); -+ free_data_refs (else_datarefs); -+ VEC_free (gimple, heap, then_stores); -+ VEC_free (gimple, heap, else_stores); -+ return false; -+ } -+ } ++int ++foo1 (int a) ++{ ++ return a | 0xffffff00; ++} + -+ /* Sink stores with same LHS. */ -+ FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store) -+ { -+ else_store = VEC_index (gimple, else_stores, i); -+ res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, -+ then_store, else_store); -+ ok = ok || res; -+ } ++/* { dg-final { scan-assembler "orn.*#255" } } */ + -+ free_dependence_relations (then_ddrs); -+ free_dependence_relations (else_ddrs); -+ free_data_refs (then_datarefs); -+ free_data_refs (else_datarefs); -+ VEC_free (gimple, heap, then_stores); -+ VEC_free (gimple, heap, else_stores); ++int ++foo2 (int a) ++{ ++ return a & 0xffeeffee; ++} + -+ return ok; ++/* { dg-final { scan-assembler "bic.*#1114129" } } */ ++ ++int ++foo3 (int a) ++{ ++ return a & 0xaaaaaa00; +} + - /* Always do these optimizations if we have SSA - trees to work on. */ - static bool ---- a/src/gcc/tree-ssa-pre.c -+++ b/src/gcc/tree-ssa-pre.c -@@ -2657,11 +2657,13 @@ - } - - /* Return true if we can value number the call in STMT. This is true -- if we have a pure or constant call. */ -+ if we have a pure or constant call to a real function. */ - - static bool - can_value_number_call (gimple stmt) - { -+ if (gimple_call_internal_p (stmt)) -+ return false; - if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) - return true; - return false; -@@ -4190,6 +4192,7 @@ - gimple_stmt_iterator gsi; - gimple stmt; - unsigned i; -+ tree fn; - - FOR_EACH_BB (b) - { -@@ -4381,9 +4384,10 @@ - /* Visit indirect calls and turn them into direct calls if - possible. */ - if (is_gimple_call (stmt) -- && TREE_CODE (gimple_call_fn (stmt)) == SSA_NAME) -+ && (fn = gimple_call_fn (stmt)) -+ && TREE_CODE (fn) == SSA_NAME) - { -- tree fn = VN_INFO (gimple_call_fn (stmt))->valnum; -+ fn = VN_INFO (fn)->valnum; - if (TREE_CODE (fn) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL) - { ---- a/src/gcc/tree-ssa-sccvn.c -+++ b/src/gcc/tree-ssa-sccvn.c -@@ -2982,7 +2982,8 @@ - /* ??? We should handle stores from calls. */ - else if (TREE_CODE (lhs) == SSA_NAME) - { -- if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) -+ if (!gimple_call_internal_p (stmt) -+ && gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) - changed = visit_reference_op_call (lhs, stmt); - else - changed = defs_to_varying (stmt); ---- a/src/gcc/tree-ssa-structalias.c -+++ b/src/gcc/tree-ssa-structalias.c -@@ -4328,6 +4328,7 @@ - /* Fallthru to general call handling. */; - } - if (!in_ipa_mode -+ || gimple_call_internal_p (t) - || (fndecl - && (!(fi = lookup_vi_for_tree (fndecl)) - || !fi->is_fn_info))) ---- a/src/gcc/tree-vect-data-refs.c -+++ b/src/gcc/tree-vect-data-refs.c -@@ -43,6 +43,45 @@ - #include "expr.h" - #include "optabs.h" - -+/* Return true if load- or store-lanes optab OPTAB is implemented for -+ COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ ++/* { dg-final { scan-assembler "and.*#-1431655766" } } */ ++/* { dg-final { scan-assembler "bic.*#170" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c ++++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c +@@ -0,0 +1,22 @@ ++/* Ensure replicated constants don't make things worse. */ ++/* { dg-options "-mthumb -O2" } */ ++/* { dg-require-effective-target arm_thumb2_ok } */ + -+static bool -+vect_lanes_optab_supported_p (const char *name, convert_optab optab, -+ tree vectype, unsigned HOST_WIDE_INT count) ++int ++foo1 (int a) +{ -+ enum machine_mode mode, array_mode; -+ bool limit_p; ++ /* It might be tempting to use 0x01000100, but it wouldn't help. */ ++ return a + 0x01f001e0; ++} + -+ mode = TYPE_MODE (vectype); -+ limit_p = !targetm.array_mode_supported_p (mode, count); -+ array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode), -+ MODE_INT, limit_p); ++/* { dg-final { scan-assembler "add.*#32505856" } } */ ++/* { dg-final { scan-assembler "add.*#480" } } */ + -+ if (array_mode == BLKmode) -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]", -+ GET_MODE_NAME (mode), count); -+ return false; -+ } ++int ++foo2 (int a) ++{ ++ return a + 0x0f100e10; ++} + -+ if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "cannot use %s<%s><%s>", -+ name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); -+ return false; -+ } ++/* { dg-final { scan-assembler "add.*#252706816" } } */ ++/* { dg-final { scan-assembler "add.*#3600" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ + -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "can use %s<%s><%s>", -+ name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); ++#include + -+ return true; ++void unknown_alignment (char *dest, char *src) ++{ ++ memcpy (dest, src, 15); +} + ++/* We should see three unaligned word loads and store pairs, one unaligned ++ ldrh/strh pair, and an ldrb/strb pair. Sanity check that. */ + - /* Return the smallest scalar part of STMT. - This is used to determine the vectype of the stmt. We generally set the - vectype according to the type of the result (lhs). For stmts whose -@@ -289,39 +328,6 @@ - } - } - -- --/* Function vect_equal_offsets. -- -- Check if OFFSET1 and OFFSET2 are identical expressions. */ -- --static bool --vect_equal_offsets (tree offset1, tree offset2) --{ -- bool res; -- -- STRIP_NOPS (offset1); -- STRIP_NOPS (offset2); -- -- if (offset1 == offset2) -- return true; -- -- if (TREE_CODE (offset1) != TREE_CODE (offset2) -- || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) -- return false; -- -- res = vect_equal_offsets (TREE_OPERAND (offset1, 0), -- TREE_OPERAND (offset2, 0)); -- -- if (!res || !BINARY_CLASS_P (offset1)) -- return res; -- -- res = vect_equal_offsets (TREE_OPERAND (offset1, 1), -- TREE_OPERAND (offset2, 1)); -- -- return res; --} -- -- - /* Check dependence between DRA and DRB for basic block vectorization. - If the accesses share same bases and offsets, we can compare their initial - constant offsets to decide whether they differ or not. In case of a read- -@@ -347,12 +353,8 @@ - - /* Check that the data-refs have same bases and offsets. If not, we can't - determine if they are dependent. */ -- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) -- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR -- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR -- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) -- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) -- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))) -+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) -+ || !dr_equal_offsets_p (dra, drb)) - return true; - - /* Check the types. */ -@@ -397,12 +399,8 @@ - - /* Check that the data-refs have same first location (except init) and they - are both either store or load (not load and store). */ -- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) -- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR -- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR -- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) -- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) -- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)) -+ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) -+ || !dr_equal_offsets_p (dra, drb) - || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) - || DR_IS_READ (dra) != DR_IS_READ (drb)) - return false; -@@ -609,6 +607,11 @@ - if (vect_check_interleaving (dra, drb)) - return false; - -+ /* Read-read is OK (we need this check here, after checking for -+ interleaving). */ -+ if (DR_IS_READ (dra) && DR_IS_READ (drb)) -+ return false; ++/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ +--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c ++++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ + - if (vect_print_dump_info (REPORT_DR_DETAILS)) - { - fprintf (vect_dump, "can't determine dependence between "); -@@ -1250,7 +1253,9 @@ - vect_peel_info elem = (vect_peel_info) *slot; - vect_peel_extended_info max = (vect_peel_extended_info) data; - -- if (elem->count > max->peel_info.count) -+ if (elem->count > max->peel_info.count -+ || (elem->count == max->peel_info.count -+ && max->peel_info.npeel > elem->npeel)) - { - max->peel_info.npeel = elem->npeel; - max->peel_info.count = elem->count; -@@ -2256,19 +2261,6 @@ - return false; - } - -- /* FORNOW: we handle only interleaving that is a power of 2. -- We don't fail here if it may be still possible to vectorize the -- group using SLP. If not, the size of the group will be checked in -- vect_analyze_operations, and the vectorization will fail. */ -- if (exact_log2 (stride) == -1) -- { -- if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "interleaving is not a power of 2"); -- -- if (slp_impossible) -- return false; -- } -- - if (stride == 0) - stride = count; - -@@ -2983,31 +2975,33 @@ - - /* Function vect_create_data_ref_ptr. - -- Create a new pointer to vector type (vp), that points to the first location -- accessed in the loop by STMT, along with the def-use update chain to -- appropriately advance the pointer through the loop iterations. Also set -- aliasing information for the pointer. This vector pointer is used by the -- callers to this function to create a memory reference expression for vector -- load/store access. -+ Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first -+ location accessed in the loop by STMT, along with the def-use update -+ chain to appropriately advance the pointer through the loop iterations. -+ Also set aliasing information for the pointer. This pointer is used by -+ the callers to this function to create a memory reference expression for -+ vector load/store access. - - Input: - 1. STMT: a stmt that references memory. Expected to be of the form - GIMPLE_ASSIGN or - GIMPLE_ASSIGN . -- 2. AT_LOOP: the loop where the vector memref is to be created. -- 3. OFFSET (optional): an offset to be added to the initial address accessed -+ 2. AGGR_TYPE: the type of the reference, which should be either a vector -+ or an array. -+ 3. AT_LOOP: the loop where the vector memref is to be created. -+ 4. OFFSET (optional): an offset to be added to the initial address accessed - by the data-ref in STMT. -- 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain -+ 5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain - pointing to the initial address. -- 5. TYPE: if not NULL indicates the required type of the data-ref. -+ 6. TYPE: if not NULL indicates the required type of the data-ref. - - Output: - 1. Declare a new ptr to vector_type, and have it point to the base of the - data reference (initial addressed accessed by the data reference). - For example, for vector of type V8HI, the following code is generated: - -- v8hi *vp; -- vp = (v8hi *)initial_address; -+ v8hi *ap; -+ ap = (v8hi *)initial_address; - - if OFFSET is not supplied: - initial_address = &a[init]; -@@ -3027,7 +3021,7 @@ - 4. Return the pointer. */ - - tree --vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, -+vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop, - tree offset, tree *initial_address, gimple *ptr_incr, - bool only_init, bool *inv_p) - { -@@ -3037,17 +3031,16 @@ - struct loop *loop = NULL; - bool nested_in_vect_loop = false; - struct loop *containing_loop = NULL; -- tree vectype = STMT_VINFO_VECTYPE (stmt_info); -- tree vect_ptr_type; -- tree vect_ptr; -+ tree aggr_ptr_type; -+ tree aggr_ptr; - tree new_temp; - gimple vec_stmt; - gimple_seq new_stmt_list = NULL; - edge pe = NULL; - basic_block new_bb; -- tree vect_ptr_init; -+ tree aggr_ptr_init; - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); -- tree vptr; -+ tree aptr; - gimple_stmt_iterator incr_gsi; - bool insert_after; - bool negative; -@@ -3058,6 +3051,9 @@ - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); - tree base; - -+ gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE -+ || TREE_CODE (aggr_type) == VECTOR_TYPE); ++#include + - if (loop_vinfo) - { - loop = LOOP_VINFO_LOOP (loop_vinfo); -@@ -3092,8 +3088,9 @@ - if (vect_print_dump_info (REPORT_DETAILS)) - { - tree data_ref_base = base_name; -- fprintf (vect_dump, "create vector-pointer variable to type: "); -- print_generic_expr (vect_dump, vectype, TDF_SLIM); -+ fprintf (vect_dump, "create %s-pointer variable to type: ", -+ tree_code_name[(int) TREE_CODE (aggr_type)]); -+ print_generic_expr (vect_dump, aggr_type, TDF_SLIM); - if (TREE_CODE (data_ref_base) == VAR_DECL - || TREE_CODE (data_ref_base) == ARRAY_REF) - fprintf (vect_dump, " vectorizing an array ref: "); -@@ -3104,27 +3101,28 @@ - print_generic_expr (vect_dump, base_name, TDF_SLIM); - } - -- /* (1) Create the new vector-pointer variable. */ -- vect_ptr_type = build_pointer_type (vectype); -+ /* (1) Create the new aggregate-pointer variable. */ -+ aggr_ptr_type = build_pointer_type (aggr_type); - base = get_base_address (DR_REF (dr)); - if (base - && TREE_CODE (base) == MEM_REF) -- vect_ptr_type -- = build_qualified_type (vect_ptr_type, -+ aggr_ptr_type -+ = build_qualified_type (aggr_ptr_type, - TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0)))); -- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, -+ aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, - get_name (base_name)); - -- /* Vector types inherit the alias set of their component type by default so -- we need to use a ref-all pointer if the data reference does not conflict -- with the created vector data reference because it is not addressable. */ -- if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), -+ /* Vector and array types inherit the alias set of their component -+ type by default so we need to use a ref-all pointer if the data -+ reference does not conflict with the created aggregated data -+ reference because it is not addressable. */ -+ if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), - get_alias_set (DR_REF (dr)))) - { -- vect_ptr_type -- = build_pointer_type_for_mode (vectype, -- TYPE_MODE (vect_ptr_type), true); -- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, -+ aggr_ptr_type -+ = build_pointer_type_for_mode (aggr_type, -+ TYPE_MODE (aggr_ptr_type), true); -+ aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, - get_name (base_name)); - } - -@@ -3135,14 +3133,14 @@ - do - { - tree lhs = gimple_assign_lhs (orig_stmt); -- if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), -+ if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), - get_alias_set (lhs))) - { -- vect_ptr_type -- = build_pointer_type_for_mode (vectype, -- TYPE_MODE (vect_ptr_type), true); -- vect_ptr -- = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, -+ aggr_ptr_type -+ = build_pointer_type_for_mode (aggr_type, -+ TYPE_MODE (aggr_ptr_type), true); -+ aggr_ptr -+ = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, - get_name (base_name)); - break; - } -@@ -3152,7 +3150,7 @@ - while (orig_stmt); - } - -- add_referenced_var (vect_ptr); -+ add_referenced_var (aggr_ptr); - - /* Note: If the dataref is in an inner-loop nested in LOOP, and we are - vectorizing LOOP (i.e., outer-loop vectorization), we need to create two -@@ -3185,8 +3183,8 @@ - vp2 = vp1 + step - if () goto LOOP */ - -- /* (2) Calculate the initial address the vector-pointer, and set -- the vector-pointer to point to it before the loop. */ -+ /* (2) Calculate the initial address of the aggregate-pointer, and set -+ the aggregate-pointer to point to it before the loop. */ - - /* Create: (&(base[init_val+offset]) in the loop preheader. */ - -@@ -3205,17 +3203,17 @@ - - *initial_address = new_temp; - -- /* Create: p = (vectype *) initial_base */ -+ /* Create: p = (aggr_type *) initial_base */ - if (TREE_CODE (new_temp) != SSA_NAME -- || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp))) -+ || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp))) - { -- vec_stmt = gimple_build_assign (vect_ptr, -- fold_convert (vect_ptr_type, new_temp)); -- vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); -+ vec_stmt = gimple_build_assign (aggr_ptr, -+ fold_convert (aggr_ptr_type, new_temp)); -+ aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt); - /* Copy the points-to information if it exists. */ - if (DR_PTR_INFO (dr)) -- duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); -- gimple_assign_set_lhs (vec_stmt, vect_ptr_init); -+ duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr)); -+ gimple_assign_set_lhs (vec_stmt, aggr_ptr_init); - if (pe) - { - new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt); -@@ -3225,19 +3223,19 @@ - gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT); - } - else -- vect_ptr_init = new_temp; -+ aggr_ptr_init = new_temp; - -- /* (3) Handle the updating of the vector-pointer inside the loop. -+ /* (3) Handle the updating of the aggregate-pointer inside the loop. - This is needed when ONLY_INIT is false, and also when AT_LOOP is the - inner-loop nested in LOOP (during outer-loop vectorization). */ - - /* No update in loop is required. */ - if (only_init && (!loop_vinfo || at_loop == loop)) -- vptr = vect_ptr_init; -+ aptr = aggr_ptr_init; - else - { -- /* The step of the vector pointer is the Vector Size. */ -- tree step = TYPE_SIZE_UNIT (vectype); -+ /* The step of the aggregate pointer is the type size. */ -+ tree step = TYPE_SIZE_UNIT (aggr_type); - /* One exception to the above is when the scalar step of the load in - LOOP is zero. In this case the step here is also zero. */ - if (*inv_p) -@@ -3247,9 +3245,9 @@ - - standard_iv_increment_position (loop, &incr_gsi, &insert_after); - -- create_iv (vect_ptr_init, -- fold_convert (vect_ptr_type, step), -- vect_ptr, loop, &incr_gsi, insert_after, -+ create_iv (aggr_ptr_init, -+ fold_convert (aggr_ptr_type, step), -+ aggr_ptr, loop, &incr_gsi, insert_after, - &indx_before_incr, &indx_after_incr); - incr = gsi_stmt (incr_gsi); - set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); -@@ -3263,14 +3261,14 @@ - if (ptr_incr) - *ptr_incr = incr; - -- vptr = indx_before_incr; -+ aptr = indx_before_incr; - } - - if (!nested_in_vect_loop || only_init) -- return vptr; -+ return aptr; - - -- /* (4) Handle the updating of the vector-pointer inside the inner-loop -+ /* (4) Handle the updating of the aggregate-pointer inside the inner-loop - nested in LOOP, if exists. */ - - gcc_assert (nested_in_vect_loop); -@@ -3278,7 +3276,7 @@ - { - standard_iv_increment_position (containing_loop, &incr_gsi, - &insert_after); -- create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr, -+ create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr, - containing_loop, &incr_gsi, insert_after, &indx_before_incr, - &indx_after_incr); - incr = gsi_stmt (incr_gsi); -@@ -3415,13 +3413,22 @@ - and FALSE otherwise. */ - - bool --vect_strided_store_supported (tree vectype) -+vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) - { - optab interleave_high_optab, interleave_low_optab; - enum machine_mode mode; - - mode = TYPE_MODE (vectype); - -+ /* vect_permute_store_chain requires the group size to be a power of two. */ -+ if (exact_log2 (count) == -1) -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "the size of the group of strided accesses" -+ " is not a power of 2"); -+ return false; -+ } ++char dest[16]; ++ ++void aligned_dest (char *src) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* Expect a multi-word store for the main part of the copy, but subword ++ loads/stores for the remainder. */ ++ ++/* { dg-final { scan-assembler-times "stmia" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ +--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c ++++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++char src[16]; ++ ++void aligned_src (char *dest) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* Expect a multi-word load for the main part of the copy, but subword ++ loads/stores for the remainder. */ ++ ++/* { dg-final { scan-assembler-times "ldmia" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrh" 1 } } */ ++/* { dg-final { scan-assembler-times "strh" 1 } } */ ++/* { dg-final { scan-assembler-times "ldrb" 1 } } */ ++/* { dg-final { scan-assembler-times "strb" 1 } } */ +--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c ++++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_unaligned } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++char src[16]; ++char dest[16]; ++ ++void aligned_both (void) ++{ ++ memcpy (dest, src, 15); ++} ++ ++/* We know both src and dest to be aligned: expect multiword loads/stores. */ ++ ++/* { dg-final { scan-assembler-times "ldmia" 1 } } */ ++/* { dg-final { scan-assembler-times "stmia" 1 } } */ +--- a/src/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv6" } */ + - /* Check that the operation is supported. */ - interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, - vectype, optab_default); -@@ -3446,6 +3453,18 @@ - } ++unsigned char foo (unsigned char c) ++{ ++ return (c >= '0') && (c <= '9'); ++} ++ ++/* { dg-final { scan-assembler-not "uxtb" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c +@@ -127,13 +127,13 @@ + void test_ldst (float f[], double d[]) { + /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */ +- /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */ ++ /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ + /* { dg-final { scan-assembler "add.+ r0, #1024" } } */ +- /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */ ++ /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */ + f[256] = f[255] + f[-255]; -+/* Return TRUE if vec_store_lanes is available for COUNT vectors of -+ type VECTYPE. */ + /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */ +- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */ ++ /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ + /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */ + d[32] = d[127] + d[-127]; + } +--- a/src/gcc/testsuite/gcc.target/arm/wmul-10.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-10.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + -+bool -+vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) ++unsigned long long ++foo (unsigned short a, unsigned short *b, unsigned short *c) +{ -+ return vect_lanes_optab_supported_p ("vec_store_lanes", -+ vec_store_lanes_optab, -+ vectype, count); ++ return (unsigned)a + (unsigned long long)*b * (unsigned long long)*c; +} + ++/* { dg-final { scan-assembler "umlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-11.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-11.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + - /* Function vect_permute_store_chain. - - Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be -@@ -3507,7 +3526,7 @@ - I3: 4 12 20 28 5 13 21 30 - I4: 6 14 22 30 7 15 23 31. */ - --bool -+void - vect_permute_store_chain (VEC(tree,heap) *dr_chain, - unsigned int length, - gimple stmt, -@@ -3521,9 +3540,7 @@ - unsigned int j; - enum tree_code high_code, low_code; - -- /* Check that the operation is supported. */ -- if (!vect_strided_store_supported (vectype)) -- return false; -+ gcc_assert (vect_strided_store_supported (vectype, length)); - - *result_chain = VEC_copy (tree, heap, dr_chain); - -@@ -3576,7 +3593,6 @@ - } - dr_chain = VEC_copy (tree, heap, *result_chain); - } -- return true; - } - - /* Function vect_setup_realignment -@@ -3746,8 +3762,9 @@ - - gcc_assert (!compute_in_loop); - vec_dest = vect_create_destination_var (scalar_dest, vectype); -- ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE, -- &init_addr, &inc, true, &inv_p); -+ ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load, -+ NULL_TREE, &init_addr, &inc, -+ true, &inv_p); - new_stmt = gimple_build_assign_with_ops - (BIT_AND_EXPR, NULL_TREE, ptr, - build_int_cst (TREE_TYPE (ptr), -@@ -3852,13 +3869,22 @@ - and FALSE otherwise. */ - - bool --vect_strided_load_supported (tree vectype) -+vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) - { - optab perm_even_optab, perm_odd_optab; - enum machine_mode mode; - - mode = TYPE_MODE (vectype); - -+ /* vect_permute_load_chain requires the group size to be a power of two. */ -+ if (exact_log2 (count) == -1) -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "the size of the group of strided accesses" -+ " is not a power of 2"); -+ return false; -+ } ++long long ++foo (int *b) ++{ ++ return 10 * (long long)*b; ++} + - perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, - optab_default); - if (!perm_even_optab) -@@ -3893,6 +3919,16 @@ - return true; - } - -+/* Return TRUE if vec_load_lanes is available for COUNT vectors of -+ type VECTYPE. */ ++/* { dg-final { scan-assembler "smull" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-12.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-12.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + -+bool -+vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) ++long long ++foo (int *b, int *c) +{ -+ return vect_lanes_optab_supported_p ("vec_load_lanes", -+ vec_load_lanes_optab, -+ vectype, count); ++ long long tmp = (long long)*b * *c; ++ return 10 + tmp; +} - - /* Function vect_permute_load_chain. - -@@ -3970,7 +4006,7 @@ - 3rd vec (E2): 2 6 10 14 18 22 26 30 - 4th vec (E4): 3 7 11 15 19 23 27 31. */ - --bool -+static void - vect_permute_load_chain (VEC(tree,heap) *dr_chain, - unsigned int length, - gimple stmt, -@@ -3983,9 +4019,7 @@ - int i; - unsigned int j; - -- /* Check that the operation is supported. */ -- if (!vect_strided_load_supported (vectype)) -- return false; -+ gcc_assert (vect_strided_load_supported (vectype, length)); - - *result_chain = VEC_copy (tree, heap, dr_chain); - for (i = 0; i < exact_log2 (length); i++) -@@ -4028,7 +4062,6 @@ - } - dr_chain = VEC_copy (tree, heap, *result_chain); - } -- return true; - } - - -@@ -4039,24 +4072,32 @@ - the scalar statements. - */ - --bool -+void - vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, - gimple_stmt_iterator *gsi) - { -- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); -- gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info); -- gimple next_stmt, new_stmt; - VEC(tree,heap) *result_chain = NULL; -- unsigned int i, gap_count; -- tree tmp_data_ref; - - /* DR_CHAIN contains input data-refs that are a part of the interleaving. - RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted - vectors, that are ready for vector computation. */ - result_chain = VEC_alloc (tree, heap, size); -- /* Permute. */ -- if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) -- return false; -+ vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); -+ vect_record_strided_load_vectors (stmt, result_chain); -+ VEC_free (tree, heap, result_chain); ++ ++/* { dg-final { scan-assembler "smlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-13.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-13.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (int *a, int *b) ++{ ++ return *a + (long long)*b * 10; +} + -+/* RESULT_CHAIN contains the output of a group of strided loads that were -+ generated as part of the vectorization of STMT. Assign the statement -+ for each vector to the associated scalar statement. */ ++/* { dg-final { scan-assembler "smlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-5.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-5.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + -+void -+vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain) ++long long ++foo (long long a, char *b, char *c) +{ -+ gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); -+ gimple next_stmt, new_stmt; -+ unsigned int i, gap_count; -+ tree tmp_data_ref; - - /* Put a permuted data-ref in the VECTORIZED_STMT field. - Since we scan the chain starting from it's first node, their order -@@ -4118,9 +4159,6 @@ - break; - } - } -- -- VEC_free (tree, heap, result_chain); -- return true; - } - - /* Function vect_force_dr_alignment_p. ---- a/src/gcc/tree-vect-generic.c -+++ b/src/gcc/tree-vect-generic.c -@@ -552,7 +552,9 @@ - || code == VEC_UNPACK_LO_EXPR - || code == VEC_PACK_TRUNC_EXPR - || code == VEC_PACK_SAT_EXPR -- || code == VEC_PACK_FIX_TRUNC_EXPR) -+ || code == VEC_PACK_FIX_TRUNC_EXPR -+ || code == VEC_WIDEN_LSHIFT_HI_EXPR -+ || code == VEC_WIDEN_LSHIFT_LO_EXPR) - type = TREE_TYPE (rhs1); - - /* Optabs will try converting a negation into a subtraction, so ---- a/src/gcc/tree-vect-loop-manip.c -+++ b/src/gcc/tree-vect-loop-manip.c -@@ -1105,35 +1105,6 @@ - first_niters = PHI_RESULT (newphi); - } - -- --/* Remove dead assignments from loop NEW_LOOP. */ -- --static void --remove_dead_stmts_from_loop (struct loop *new_loop) --{ -- basic_block *bbs = get_loop_body (new_loop); -- unsigned i; -- for (i = 0; i < new_loop->num_nodes; ++i) -- { -- gimple_stmt_iterator gsi; -- for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);) -- { -- gimple stmt = gsi_stmt (gsi); -- if (is_gimple_assign (stmt) -- && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME -- && has_zero_uses (gimple_assign_lhs (stmt))) -- { -- gsi_remove (&gsi, true); -- release_defs (stmt); -- } -- else -- gsi_next (&gsi); -- } -- } -- free (bbs); --} -- -- - /* Function slpeel_tree_peel_loop_to_edge. - - Peel the first (last) iterations of LOOP into a new prolog (epilog) loop -@@ -1445,13 +1416,6 @@ - BITMAP_FREE (definitions); - delete_update_ssa (); - -- /* Remove all pattern statements from the loop copy. They will confuse -- the expander if DCE is disabled. -- ??? The pattern recognizer should be split into an analysis and -- a transformation phase that is then run only on the loop that is -- going to be transformed. */ -- remove_dead_stmts_from_loop (new_loop); -- - adjust_vec_debug_stmts (); - - return new_loop; ---- a/src/gcc/tree-vect-loop.c -+++ b/src/gcc/tree-vect-loop.c -@@ -181,6 +181,8 @@ - stmt_vec_info stmt_info; - int i; - HOST_WIDE_INT dummy; -+ gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL; -+ bool analyze_pattern_stmt = false, pattern_def = false; - - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); -@@ -241,12 +243,20 @@ - } - } - -- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) -+ for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;) - { -- tree vf_vectype; -- gimple stmt = gsi_stmt (si); -- stmt_info = vinfo_for_stmt (stmt); -+ tree vf_vectype; - -+ if (analyze_pattern_stmt) -+ { -+ stmt = pattern_stmt; -+ analyze_pattern_stmt = false; -+ } -+ else -+ stmt = gsi_stmt (si); ++ return a + *b * *c; ++} + -+ stmt_info = vinfo_for_stmt (stmt); -+ - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "==> examining statement: "); -@@ -259,11 +269,57 @@ - if (!STMT_VINFO_RELEVANT_P (stmt_info) - && !STMT_VINFO_LIVE_P (stmt_info)) - { -- if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "skip."); -- continue; -+ if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) -+ { -+ stmt = pattern_stmt; -+ stmt_info = vinfo_for_stmt (pattern_stmt); -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "==> examining pattern statement: "); -+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -+ } -+ } -+ else -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "skip."); -+ gsi_next (&si); -+ continue; -+ } - } - -+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) -+ analyze_pattern_stmt = true; ++/* { dg-final { scan-assembler "umlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-6.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-6.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + -+ /* If a pattern statement has a def stmt, analyze it too. */ -+ if (is_pattern_stmt_p (stmt_info) -+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) -+ { -+ if (pattern_def) -+ pattern_def = false; -+ else -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "==> examining pattern def stmt: "); -+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, -+ TDF_SLIM); -+ } ++long long ++foo (long long a, unsigned char *b, signed char *c) ++{ ++ return a + (long long)*b * (long long)*c; ++} + -+ pattern_def = true; -+ stmt = pattern_def_stmt; -+ stmt_info = vinfo_for_stmt (stmt); -+ } -+ } ++/* { dg-final { scan-assembler "smlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-7.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-7.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + - if (gimple_get_lhs (stmt) == NULL_TREE) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) -@@ -295,9 +351,7 @@ - } - else - { -- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) -- && !is_pattern_stmt_p (stmt_info)); -- -+ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); - scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); - if (vect_print_dump_info (REPORT_DETAILS)) - { -@@ -369,6 +423,9 @@ - if (!vectorization_factor - || (nunits > vectorization_factor)) - vectorization_factor = nunits; ++unsigned long long ++foo (unsigned long long a, unsigned char *b, unsigned short *c) ++{ ++ return a + *b * *c; ++} + -+ if (!analyze_pattern_stmt && !pattern_def) -+ gsi_next (&si); - } - } - -@@ -817,25 +874,17 @@ - - if (stmt_info) - { -- /* Check if this is a "pattern stmt" (introduced by the -- vectorizer during the pattern recognition pass). */ -- bool remove_stmt_p = false; -- gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); -- if (orig_stmt) -- { -- stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); -- if (orig_stmt_info -- && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) -- remove_stmt_p = true; -- } -+ /* Check if this statement has a related "pattern stmt" -+ (introduced by the vectorizer during the pattern recognition -+ pass). Free pattern's stmt_vec_info. */ -+ if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info))) -+ free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info)); - - /* Free stmt_vec_info. */ - free_stmt_vec_info (stmt); -- -- /* Remove dead "pattern stmts". */ -- if (remove_stmt_p) -- gsi_remove (&si, true); - } ++/* { dg-final { scan-assembler "umlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-8.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-8.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (long long a, int *b, int *c) ++{ ++ return a + (long long)*b * *c; ++} ++ ++/* { dg-final { scan-assembler "smlal" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-9.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-9.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++long long ++foo (long long a, short *b, char *c) ++{ ++ return a + *b * *c; ++} ++ ++/* { dg-final { scan-assembler "smlalbb" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ ++ ++struct bf ++{ ++ int a : 3; ++ int b : 15; ++ int c : 3; ++}; ++ ++long long ++foo (long long a, struct bf b, struct bf c) ++{ ++ return a + b.b * c.b; ++} ++ ++/* { dg-final { scan-assembler "smlalbb" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c ++++ b/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv7-a" } */ + - gsi_next (&si); - } - } -@@ -1409,7 +1458,7 @@ - - vect_analyze_scalar_cycles (loop_vinfo); - -- vect_pattern_recog (loop_vinfo); -+ vect_pattern_recog (loop_vinfo, NULL); - - /* Data-flow analysis to detect stmts that do not need to be vectorized. */ - -@@ -3233,8 +3282,8 @@ - - /* Get the loop-entry arguments. */ - if (slp_node) -- vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs, -- NULL, reduc_index); -+ vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs, -+ NULL, slp_node, reduc_index); - else - { - vec_initial_defs = VEC_alloc (tree, heap, 1); -@@ -3959,7 +4008,7 @@ - VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL; - VEC (gimple, heap) *phis = NULL; - int vec_num; -- tree def0, def1, tem; -+ tree def0, def1, tem, op0, op1 = NULL_TREE; - - if (nested_in_vect_loop_p (loop, stmt)) - { -@@ -4038,6 +4087,9 @@ - gcc_unreachable (); - } - -+ if (code == COND_EXPR && slp_node) -+ return false; ++struct bf ++{ ++ int a : 3; ++ unsigned int b : 15; ++ int c : 3; ++}; + - scalar_dest = gimple_assign_lhs (stmt); - scalar_type = TREE_TYPE (scalar_dest); - if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) -@@ -4112,7 +4164,7 @@ - - if (code == COND_EXPR) - { -- if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0)) -+ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "unsupported condition in reduction"); -@@ -4267,6 +4319,25 @@ - return false; - } - -+ /* In case of widenning multiplication by a constant, we update the type -+ of the constant to be the type of the other operand. We check that the -+ constant fits the type in the pattern recognition pass. */ -+ if (code == DOT_PROD_EXPR -+ && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1]))) -+ { -+ if (TREE_CODE (ops[0]) == INTEGER_CST) -+ ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]); -+ else if (TREE_CODE (ops[1]) == INTEGER_CST) -+ ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]); -+ else -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "invalid types in dot-prod"); ++long long ++foo (long long a, struct bf b, struct bf c) ++{ ++ return a + b.b * c.c; ++} + -+ return false; -+ } -+ } ++/* { dg-final { scan-assembler "smlalbb" } } */ +--- a/src/gcc/testsuite/gcc.target/arm/xor-and.c ++++ b/src/gcc/testsuite/gcc.target/arm/xor-and.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -march=armv6" } */ + - if (!vec_stmt) /* transformation not required. */ - { - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; -@@ -4365,7 +4436,7 @@ - gcc_assert (!slp_node); - vectorizable_condition (stmt, gsi, vec_stmt, - PHI_RESULT (VEC_index (gimple, phis, 0)), -- reduc_index); -+ reduc_index, NULL); - /* Multiple types are not supported for condition. */ - break; - } -@@ -4373,8 +4444,6 @@ - /* Handle uses. */ - if (j == 0) - { -- tree op0, op1 = NULL_TREE; -- - op0 = ops[!reduc_index]; - if (op_type == ternary_op) - { -@@ -4385,8 +4454,8 @@ - } - - if (slp_node) -- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1, -- -1); -+ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, -+ slp_node, -1); - else - { - loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], -@@ -4404,11 +4473,19 @@ - { - if (!slp_node) - { -- enum vect_def_type dt = vect_unknown_def_type; /* Dummy */ -- loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0); -+ enum vect_def_type dt; -+ gimple dummy_stmt; -+ tree dummy; ++unsigned short foo (unsigned short x) ++{ ++ x ^= 0x4002; ++ x >>= 1; ++ x |= 0x8000; ++ return x; ++} + -+ vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL, -+ &dummy_stmt, &dummy, &dt); -+ loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, -+ loop_vec_def0); - VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0); - if (op_type == ternary_op) - { -+ vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt, -+ &dummy, &dt); - loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, - loop_vec_def1); - VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1); -@@ -4713,6 +4790,8 @@ - tree cond_expr = NULL_TREE; - gimple_seq cond_expr_stmt_list = NULL; - bool do_peeling_for_loop_bound; -+ gimple stmt, pattern_stmt, pattern_def_stmt; -+ bool transform_pattern_stmt = false, pattern_def = false; - - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "=== vec_transform_loop ==="); -@@ -4800,11 +4879,19 @@ - } - } - -- for (si = gsi_start_bb (bb); !gsi_end_p (si);) -+ pattern_stmt = NULL; -+ for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;) - { -- gimple stmt = gsi_stmt (si); - bool is_store; - -+ if (transform_pattern_stmt) -+ { -+ stmt = pattern_stmt; -+ transform_pattern_stmt = false; -+ } -+ else -+ stmt = gsi_stmt (si); ++/* { dg-final { scan-assembler "orr" } } */ ++/* { dg-final { scan-assembler-not "mvn" } } */ ++/* { dg-final { scan-assembler-not "uxth" } } */ +--- a/src/gcc/testsuite/gcc.target/i386/pr51987.c ++++ b/src/gcc/testsuite/gcc.target/i386/pr51987.c +@@ -0,0 +1,33 @@ ++/* PR tree-optimization/51987 */ ++/* { dg-do run { target { ! { ilp32 } } } } */ ++/* { dg-options "-O3" } */ + - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "------>vectorizing statement: "); -@@ -4827,14 +4914,54 @@ - - if (!STMT_VINFO_RELEVANT_P (stmt_info) - && !STMT_VINFO_LIVE_P (stmt_info)) -- { -- gsi_next (&si); -- continue; -+ { -+ if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) -+ { -+ stmt = pattern_stmt; -+ stmt_info = vinfo_for_stmt (stmt); -+ } -+ else -+ { -+ gsi_next (&si); -+ continue; -+ } - } -+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) -+ transform_pattern_stmt = true; ++extern void abort (void); ++union U { unsigned long long l; struct { unsigned int l, h; } i; }; + -+ /* If pattern statement has a def stmt, vectorize it too. */ -+ if (is_pattern_stmt_p (stmt_info) -+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) -+ { -+ if (pattern_def) -+ pattern_def = false; -+ else -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "==> vectorizing pattern def" -+ " stmt: "); -+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, -+ TDF_SLIM); -+ } ++__attribute__((noinline, noclone)) void ++foo (char *x, char *y) ++{ ++ int i; ++ for (i = 0; i < 64; i++) ++ { ++ union U u; ++ asm ("movl %1, %k0; salq $32, %0" : "=r" (u.l) : "r" (i)); ++ x[i] = u.i.h; ++ union U v; ++ asm ("movl %1, %k0; salq $32, %0" : "=r" (v.l) : "r" (i)); ++ y[i] = v.i.h; ++ } ++} + -+ pattern_def = true; -+ stmt = pattern_def_stmt; -+ stmt_info = vinfo_for_stmt (stmt); -+ } -+ } - - gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); -- nunits = -- (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); -+ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( -+ STMT_VINFO_VECTYPE (stmt_info)); - if (!STMT_SLP_TYPE (stmt_info) - && nunits != (unsigned int) vectorization_factor - && vect_print_dump_info (REPORT_DETAILS)) -@@ -4859,8 +4986,9 @@ - /* Hybrid SLP stmts must be vectorized in addition to SLP. */ - if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) - { -- gsi_next (&si); -- continue; -+ if (!transform_pattern_stmt && !pattern_def) -+ gsi_next (&si); -+ continue; - } - } - -@@ -4879,7 +5007,7 @@ - the chain. */ - vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); - gsi_remove (&si, true); -- continue; -+ continue; - } - else - { -@@ -4889,7 +5017,9 @@ - continue; - } - } -- gsi_next (&si); ++int ++main () ++{ ++ char a[64], b[64]; ++ int i; ++ foo (a, b); ++ for (i = 0; i < 64; i++) ++ if (a[i] != i || b[i] != i) ++ abort (); ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/sparc/ultrasp12.c ++++ b/src/gcc/testsuite/gcc.target/sparc/ultrasp12.c +@@ -0,0 +1,64 @@ ++/* PR rtl-optimization/48830 */ ++/* Testcase by Hans-Peter Nilsson */ + -+ if (!transform_pattern_stmt && !pattern_def) -+ gsi_next (&si); - } /* stmts in BB */ - } /* BBs in loop */ - ---- a/src/gcc/tree-vect-patterns.c -+++ b/src/gcc/tree-vect-patterns.c -@@ -38,33 +38,40 @@ - #include "recog.h" - #include "diagnostic-core.h" - --/* Function prototypes */ --static void vect_pattern_recog_1 -- (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); --static bool widened_name_p (tree, gimple, tree *, gimple *); -- - /* Pattern recognition functions */ --static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); --static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); --static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); --static gimple vect_recog_pow_pattern (gimple, tree *, tree *); -+static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, -+ tree *); -+static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, -+ tree *); -+static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, -+ tree *); -+static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); -+static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, -+ tree *); -+static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, -+ tree *, tree *); -+static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, -+ tree *, tree *); - static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { - vect_recog_widen_mult_pattern, - vect_recog_widen_sum_pattern, - vect_recog_dot_prod_pattern, -- vect_recog_pow_pattern}; -- -+ vect_recog_pow_pattern, -+ vect_recog_over_widening_pattern, -+ vect_recog_widen_shift_pattern, -+ vect_recog_mixed_size_cond_pattern}; - --/* Function widened_name_p - -- Check whether NAME, an ssa-name used in USE_STMT, -- is a result of a type-promotion, such that: -- DEF_STMT: NAME = NOP (name0) -- where the type of name0 (HALF_TYPE) is smaller than the type of NAME. --*/ -+/* Check whether NAME, an ssa-name used in USE_STMT, -+ is a result of a type promotion or demotion, such that: -+ DEF_STMT: NAME = NOP (name0) -+ where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME. -+ If CHECK_SIGN is TRUE, check that either both types are signed or both are -+ unsigned. */ - - static bool --widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) -+type_conversion_p (tree name, gimple use_stmt, bool check_sign, -+ tree *orig_type, gimple *def_stmt, bool *promotion) - { - tree dummy; - gimple dummy_gimple; -@@ -74,35 +81,43 @@ - tree oprnd0; - enum vect_def_type dt; - tree def; -+ bb_vec_info bb_vinfo; - - stmt_vinfo = vinfo_for_stmt (use_stmt); - loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); - -- if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt)) -+ if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt)) - return false; - - if (dt != vect_internal_def - && dt != vect_external_def && dt != vect_constant_def) - return false; - -- if (! *def_stmt) -+ if (!*def_stmt) - return false; - - if (!is_gimple_assign (*def_stmt)) - return false; - -- if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) -+ if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) - return false; - - oprnd0 = gimple_assign_rhs1 (*def_stmt); - -- *half_type = TREE_TYPE (oprnd0); -- if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) -- || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) -- || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) -+ *orig_type = TREE_TYPE (oprnd0); -+ if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) -+ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) -+ return false; ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */ + -+ if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) -+ *promotion = true; -+ else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2)) -+ *promotion = false; -+ else - return false; - -- if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy, -+ if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, - &dt)) - return false; - -@@ -145,9 +160,9 @@ - - Input: - -- * LAST_STMT: A stmt from which the pattern search begins. In the example, -- when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be -- detected. -+ * STMTS: Contains a stmt from which the pattern search begins. In the -+ example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} -+ will be detected. - - Output: - -@@ -168,9 +183,10 @@ - inner-loop nested in an outer-loop that us being vectorized). */ - - static gimple --vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) -+vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, -+ tree *type_out) - { -- gimple stmt; -+ gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); - tree oprnd0, oprnd1; - tree oprnd00, oprnd01; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); -@@ -178,8 +194,14 @@ - gimple pattern_stmt; - tree prod_type; - loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -- struct loop *loop = LOOP_VINFO_LOOP (loop_info); -+ struct loop *loop; - tree var, rhs; -+ bool promotion; ++typedef unsigned char uint8_t; ++typedef unsigned int uint32_t; ++typedef unsigned long int uint64_t; ++typedef unsigned long int uintmax_t; ++typedef unsigned char rc_vec_t __attribute__((__vector_size__(8))); ++typedef short rc_svec_type_ __attribute__((__vector_size__(8))); ++typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4))); + -+ if (!loop_info) -+ return NULL; ++void ++rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim, ++ const uint8_t *__restrict src2, int src2_dim, ++ int len, int height, uintmax_t sum[5]) ++{ ++ uint32_t s1 = 0; ++ uint32_t s2 = 0; ++ uintmax_t s11 = 0; ++ uintmax_t s22 = 0; ++ uintmax_t s12 = 0; ++ int full = len / ((1024) < (1024) ? (1024) : (1024)); ++ int rem = len % ((1024) < (1024) ? (1024) : (1024)); ++ int rem1 = rem / 1; ++ int y; ++ unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_ __attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_ __attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__ ((__unused__)) = 0; ++ for (y = 0; y < height; y++) { ++ rc_vec_t a1, a2, a11, a22, a12; ++ int i1 = (y)*(src1_dim); ++ int i2 = (y)*(src2_dim); ++ int x; ++ ((a1) = ((rc_vec_t) {0})); ++ ((a2) = ((rc_vec_t) {0})); ++ ((a11) = ((rc_vec_t) {0})); ++ ((a22) = ((rc_vec_t) {0})); ++ ((a12) = ((rc_vec_t) {0})); ++ for (x = 0; x < full; x++) { ++ int k; ++ for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) / ++ 1; k++) ++ { ++ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); + -+ loop = LOOP_VINFO_LOOP (loop_info); - - if (!is_gimple_assign (last_stmt)) - return NULL; -@@ -238,7 +260,9 @@ - return NULL; - stmt = last_stmt; - -- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) -+ if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt, -+ &promotion) -+ && promotion) - { - stmt = def_stmt; - oprnd0 = gimple_assign_rhs1 (stmt); -@@ -293,10 +317,14 @@ - if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) - || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) - return NULL; -- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) -+ if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt, -+ &promotion) -+ || !promotion) - return NULL; - oprnd00 = gimple_assign_rhs1 (def_stmt); -- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) -+ if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt, -+ &promotion) -+ || !promotion) - return NULL; - oprnd01 = gimple_assign_rhs1 (def_stmt); - if (!types_compatible_p (half_type0, half_type1)) -@@ -327,6 +355,100 @@ - return pattern_stmt; ++ } ++ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); ++ } ++ for (x = 0; x < rem1; x++) { ++ do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0); ++ } ++ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); ++ ++ do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0); ++ } ++ sum[0] = s1; ++ sum[1] = s2; ++ sum[2] = s11; ++ sum[3] = s22; ++ sum[4] = s12; ++ ; ++} +--- a/src/gcc/testsuite/gfortran.dg/vect/pr19049.f90 ++++ b/src/gcc/testsuite/gfortran.dg/vect/pr19049.f90 +@@ -19,6 +19,7 @@ + end + + ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } +-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } } ++! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } } ++! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } } + ! { dg-final { cleanup-tree-dump "vect" } } + +--- a/src/gcc/testsuite/lib/target-supports.exp ++++ b/src/gcc/testsuite/lib/target-supports.exp +@@ -1894,6 +1894,18 @@ + }] } ++# Return 1 if this is an ARM target that supports unaligned word/halfword ++# load/store instructions. + -+/* Handle widening operation by a constant. At the moment we support MULT_EXPR -+ and LSHIFT_EXPR. -+ -+ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR -+ we check that CONST_OPRND is less or equal to the size of HALF_TYPE. -+ -+ Otherwise, if the type of the result (TYPE) is at least 4 times bigger than -+ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) -+ that satisfies the above restrictions, we can perform a widening opeartion -+ from the intermediate type to TYPE and replace a_T = (TYPE) a_t; -+ with a_it = (interm_type) a_t; */ ++proc check_effective_target_arm_unaligned { } { ++ return [check_no_compiler_messages arm_unaligned assembly { ++ #ifndef __ARM_FEATURE_UNALIGNED ++ #error no unaligned support ++ #endif ++ int i; ++ }] ++} + -+static bool -+vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, -+ tree const_oprnd, tree *oprnd, -+ VEC (gimple, heap) **stmts, tree type, -+ tree *half_type, gimple def_stmt) -+{ -+ tree new_type, new_oprnd, tmp; -+ gimple new_stmt; -+ loop_vec_info loop_vinfo; -+ struct loop *loop = NULL; -+ bb_vec_info bb_vinfo; -+ stmt_vec_info stmt_vinfo; + # Add the options needed for NEON. We need either -mfloat-abi=softfp + # or -mfloat-abi=hard, but if one is already specified by the + # multilib, use it. Similarly, if a -mfpu option already enables +@@ -1988,6 +2000,47 @@ + check_effective_target_arm_fp16_ok_nocache] + } + ++# Creates a series of routines that return 1 if the given architecture ++# can be selected and a routine to give the flags to select that architecture ++# Note: Extra flags may be added to disable options from newer compilers ++# (Thumb in particular - but others may be added in the future) ++# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */ ++# /* { dg-add-options arm_arch_v5 } */ ++foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__ ++ v6 "-march=armv6" __ARM_ARCH_6__ ++ v6k "-march=armv6k" __ARM_ARCH_6K__ ++ v7a "-march=armv7-a" __ARM_ARCH_7A__ } { ++ eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { ++ proc check_effective_target_arm_arch_FUNC_ok { } { ++ if { [ string match "*-marm*" "FLAG" ] && ++ ![check_effective_target_arm_arm_ok] } { ++ return 0 ++ } ++ return [check_no_compiler_messages arm_arch_FUNC_ok assembly { ++ #if !defined (DEF) ++ #error FOO ++ #endif ++ } "FLAG" ] ++ } + -+ stmt_vinfo = vinfo_for_stmt (stmt); -+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); -+ if (loop_vinfo) -+ loop = LOOP_VINFO_LOOP (loop_vinfo); ++ proc add_options_for_arm_arch_FUNC { flags } { ++ return "$flags FLAG" ++ } ++ }] ++} + -+ if (code != MULT_EXPR && code != LSHIFT_EXPR) -+ return false; ++# Return 1 if this is an ARM target where -marm causes ARM to be ++# used (not Thumb) + -+ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) -+ || (code == LSHIFT_EXPR -+ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) -+ != 1)) -+ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) -+ { -+ /* CONST_OPRND is a constant of HALF_TYPE. */ -+ *oprnd = gimple_assign_rhs1 (def_stmt); -+ return true; -+ } ++proc check_effective_target_arm_arm_ok { } { ++ return [check_no_compiler_messages arm_arm_ok assembly { ++ #if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__) ++ #error FOO ++ #endif ++ } "-marm"] ++} + -+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) -+ || !gimple_bb (def_stmt) -+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) -+ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo) -+ && gimple_code (def_stmt) != GIMPLE_PHI) -+ || !vinfo_for_stmt (def_stmt)) -+ return false; + -+ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for -+ a type 2 times bigger than HALF_TYPE. */ -+ new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, -+ TYPE_UNSIGNED (type)); -+ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) -+ || (code == LSHIFT_EXPR -+ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) -+ return false; + # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be + # used. + +@@ -2338,6 +2391,26 @@ + } + + ++# Return 1 if the target supports hardware vector shift operation for char. + -+ /* Use NEW_TYPE for widening operation. */ -+ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) -+ { -+ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); -+ /* Check if the already created pattern stmt is what we need. */ -+ if (!is_gimple_assign (new_stmt) -+ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR -+ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) -+ return false; ++proc check_effective_target_vect_shift_char { } { ++ global et_vect_shift_char_saved + -+ VEC_safe_push (gimple, heap, *stmts, def_stmt); -+ *oprnd = gimple_assign_lhs (new_stmt); -+ } -+ else -+ { -+ /* Create a_T = (NEW_TYPE) a_t; */ -+ *oprnd = gimple_assign_rhs1 (def_stmt); -+ tmp = create_tmp_var (new_type, NULL); -+ add_referenced_var (tmp); -+ new_oprnd = make_ssa_name (tmp, NULL); -+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, -+ NULL_TREE); -+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; -+ VEC_safe_push (gimple, heap, *stmts, def_stmt); -+ *oprnd = new_oprnd; ++ if [info exists et_vect_shift_char_saved] { ++ verbose "check_effective_target_vect_shift_char: using cached result" 2 ++ } else { ++ set et_vect_shift_char_saved 0 ++ if { ([istarget powerpc*-*-*] ++ && ![istarget powerpc-*-linux*paired*]) ++ || [check_effective_target_arm32] } { ++ set et_vect_shift_char_saved 1 ++ } + } + -+ *half_type = new_type; -+ return true; ++ verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2 ++ return $et_vect_shift_char_saved +} + - /* Function vect_recog_widen_mult_pattern - - Try to find the following pattern: -@@ -342,37 +464,80 @@ - - where type 'TYPE' is at least double the size of type 'type'. - -- Input: -+ Also detect unsgigned cases: + # Return 1 if the target supports hardware vectors of long, 0 otherwise. + # + # This can change for different subtargets so do not cache the result. +@@ -2673,7 +2746,8 @@ + } else { + set et_vect_widen_mult_qi_to_hi_saved 0 + } +- if { [istarget powerpc*-*-*] } { ++ if { [istarget powerpc*-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_qi_to_hi_saved 1 + } + } +@@ -2706,7 +2780,8 @@ + || [istarget spu-*-*] + || [istarget ia64-*-*] + || [istarget i?86-*-*] +- || [istarget x86_64-*-*] } { ++ || [istarget x86_64-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_hi_to_si_saved 1 + } + } +@@ -2715,6 +2790,72 @@ + } -- * LAST_STMT: A stmt from which the pattern search begins. In the example, -- when this function is called with S5, the pattern {S3,S4,S5} is be detected. -+ unsigned type a_t, b_t; -+ unsigned TYPE u_prod_T; -+ TYPE a_T, b_T, prod_T; + # Return 1 if the target plus current options supports a vector ++# widening multiplication of *char* args into *short* result, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. + -+ S1 a_t = ; -+ S2 b_t = ; -+ S3 a_T = (TYPE) a_t; -+ S4 b_T = (TYPE) b_t; -+ S5 prod_T = a_T * b_T; -+ S6 u_prod_T = (unsigned TYPE) prod_T; ++proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { ++ global et_vect_widen_mult_qi_to_hi_pattern + -+ and multiplication by constants: ++ if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] { ++ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2 ++ } else { ++ set et_vect_widen_mult_qi_to_hi_pattern_saved 0 ++ if { [istarget powerpc*-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { ++ set et_vect_widen_mult_qi_to_hi_pattern_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2 ++ return $et_vect_widen_mult_qi_to_hi_pattern_saved ++} + -+ type a_t; -+ TYPE a_T, prod_T; ++# Return 1 if the target plus current options supports a vector ++# widening multiplication of *short* args into *int* result, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. + -+ S1 a_t = ; -+ S3 a_T = (TYPE) a_t; -+ S5 prod_T = a_T * CONST; ++proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { ++ global et_vect_widen_mult_hi_to_si_pattern + -+ A special case of multiplication by constants is when 'TYPE' is 4 times -+ bigger than 'type', but CONST fits an intermediate type 2 times smaller -+ than 'TYPE'. In that case we create an additional pattern stmt for S3 -+ to create a variable of the intermediate type, and perform widen-mult -+ on the intermediate type as well: ++ if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] { ++ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2 ++ } else { ++ set et_vect_widen_mult_hi_to_si_pattern_saved 0 ++ if { [istarget powerpc*-*-*] ++ || [istarget spu-*-*] ++ || [istarget ia64-*-*] ++ || [istarget i?86-*-*] ++ || [istarget x86_64-*-*] ++ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { ++ set et_vect_widen_mult_hi_to_si_pattern_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2 ++ return $et_vect_widen_mult_hi_to_si_pattern_saved ++} + -+ type a_t; -+ interm_type a_it; -+ TYPE a_T, prod_T, prod_T'; ++# Return 1 if the target plus current options supports a vector ++# widening shift, 0 otherwise. ++# ++# This won't change for different subtargets so cache the result. + -+ S1 a_t = ; -+ S3 a_T = (TYPE) a_t; -+ '--> a_it = (interm_type) a_t; -+ S5 prod_T = a_T * CONST; -+ '--> prod_T' = a_it w* CONST; ++proc check_effective_target_vect_widen_shift { } { ++ global et_vect_widen_shift_saved + -+ Input/Output: ++ if [info exists et_vect_shift_saved] { ++ verbose "check_effective_target_vect_widen_shift: using cached result" 2 ++ } else { ++ set et_vect_widen_shift_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect_widen_shift_saved 1 ++ } ++ } ++ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2 ++ return $et_vect_widen_shift_saved ++} + -+ * STMTS: Contains a stmt from which the pattern search begins. In the -+ example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} -+ is detected. In case of unsigned widen-mult, the original stmt (S5) is -+ replaced with S6 in STMTS. In case of multiplication by a constant -+ of an intermediate type (the last case above), STMTS also contains S3 -+ (inserted before S5). - - Output: - - * TYPE_IN: The type of the input arguments to the pattern. - -- * TYPE_OUT: The type of the output of this pattern. -+ * TYPE_OUT: The type of the output of this pattern. - - * Return value: A new stmt that will be used to replace the sequence of -- stmts that constitute the pattern. In this case it will be: -+ stmts that constitute the pattern. In this case it will be: - WIDEN_MULT - */ ++# Return 1 if the target plus current options supports a vector + # dot-product of signed chars, 0 otherwise. + # + # This won't change for different subtargets so cache the result. +@@ -3170,29 +3311,6 @@ + return $et_vect_extract_even_odd_saved + } - static gimple --vect_recog_widen_mult_pattern (gimple last_stmt, -- tree *type_in, -- tree *type_out) -+vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, -+ tree *type_in, tree *type_out) - { -+ gimple last_stmt = VEC_pop (gimple, *stmts); - gimple def_stmt0, def_stmt1; - tree oprnd0, oprnd1; - tree type, half_type0, half_type1; - gimple pattern_stmt; -- tree vectype, vectype_out; -+ tree vectype, vectype_out = NULL_TREE; - tree dummy; - tree var; - enum tree_code dummy_code; - int dummy_int; - VEC (tree, heap) *dummy_vec; -+ bool op1_ok, promotion; +-# Return 1 if the target supports vector even/odd elements extraction of +-# vectors with SImode elements or larger, 0 otherwise. +- +-proc check_effective_target_vect_extract_even_odd_wide { } { +- global et_vect_extract_even_odd_wide_saved +- +- if [info exists et_vect_extract_even_odd_wide_saved] { +- verbose "check_effective_target_vect_extract_even_odd_wide: using cached result" 2 +- } else { +- set et_vect_extract_even_odd_wide_saved 0 +- if { [istarget powerpc*-*-*] +- || [istarget i?86-*-*] +- || [istarget x86_64-*-*] +- || [istarget ia64-*-*] +- || [istarget spu-*-*] } { +- set et_vect_extract_even_odd_wide_saved 1 +- } +- } +- +- verbose "check_effective_target_vect_extract_even_wide_odd: returning $et_vect_extract_even_odd_wide_saved" 2 +- return $et_vect_extract_even_odd_wide_saved +-} +- + # Return 1 if the target supports vector interleaving, 0 otherwise. - if (!is_gimple_assign (last_stmt)) - return NULL; -@@ -391,15 +556,68 @@ - || !types_compatible_p (TREE_TYPE (oprnd1), type)) - return NULL; + proc check_effective_target_vect_interleave { } { +@@ -3215,41 +3333,66 @@ + return $et_vect_interleave_saved + } -- /* Check argument 0 */ -- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) -+ /* Check argument 0. */ -+ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, -+ &promotion) -+ || !promotion) - return NULL; -- oprnd0 = gimple_assign_rhs1 (def_stmt0); -+ /* Check argument 1. */ -+ op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1, -+ &def_stmt1, &promotion); -+ if (op1_ok && promotion) -+ { -+ oprnd0 = gimple_assign_rhs1 (def_stmt0); -+ oprnd1 = gimple_assign_rhs1 (def_stmt1); -+ } -+ else -+ { -+ if (TREE_CODE (oprnd1) == INTEGER_CST -+ && TREE_CODE (half_type0) == INTEGER_TYPE -+ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, -+ &oprnd0, stmts, type, -+ &half_type0, def_stmt0)) -+ half_type1 = half_type0; -+ else -+ return NULL; -+ } +-# Return 1 if the target supports vector interleaving and extract even/odd, 0 otherwise. +-proc check_effective_target_vect_strided { } { +- global et_vect_strided_saved ++foreach N {2 3 4 8} { ++ eval [string map [list N $N] { ++ # Return 1 if the target supports 2-vector interleaving ++ proc check_effective_target_vect_stridedN { } { ++ global et_vect_stridedN_saved -- /* Check argument 1 */ -- if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) -- return NULL; -- oprnd1 = gimple_assign_rhs1 (def_stmt1); -+ /* Handle unsigned case. Look for -+ S6 u_prod_T = (unsigned TYPE) prod_T; -+ Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ -+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) -+ { -+ tree lhs = gimple_assign_lhs (last_stmt), use_lhs; -+ imm_use_iterator imm_iter; -+ use_operand_p use_p; -+ int nuses = 0; -+ gimple use_stmt = NULL; -+ tree use_type; -+ -+ if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) -+ return NULL; +- if [info exists et_vect_strided_saved] { +- verbose "check_effective_target_vect_strided: using cached result" 2 ++ if [info exists et_vect_stridedN_saved] { ++ verbose "check_effective_target_vect_stridedN: using cached result" 2 ++ } else { ++ set et_vect_stridedN_saved 0 ++ if { (N & -N) == N ++ && [check_effective_target_vect_interleave] ++ && [check_effective_target_vect_extract_even_odd] } { ++ set et_vect_stridedN_saved 1 ++ } ++ if { [istarget arm*-*-*] && N >= 2 && N <= 4 } { ++ set et_vect_stridedN_saved 1 ++ } ++ } + -+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) -+ { -+ if (is_gimple_debug (USE_STMT (use_p))) -+ continue; -+ use_stmt = USE_STMT (use_p); -+ nuses++; -+ } ++ verbose "check_effective_target_vect_stridedN: returning $et_vect_stridedN_saved" 2 ++ return $et_vect_stridedN_saved ++ } ++ }] ++} + -+ if (nuses != 1 || !is_gimple_assign (use_stmt) -+ || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) -+ return NULL; ++# Return 1 if the target supports multiple vector sizes + -+ use_lhs = gimple_assign_lhs (use_stmt); -+ use_type = TREE_TYPE (use_lhs); -+ if (!INTEGRAL_TYPE_P (use_type) -+ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) -+ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) -+ return NULL; ++proc check_effective_target_vect_multiple_sizes { } { ++ global et_vect_multiple_sizes_saved + -+ type = use_type; -+ last_stmt = use_stmt; -+ } - - if (!types_compatible_p (half_type0, half_type1)) - return NULL; -@@ -431,6 +649,7 @@ - if (vect_print_dump_info (REPORT_DETAILS)) - print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ if [info exists et_vect_multiple_sizes_saved] { ++ verbose "check_effective_target_vect_multiple_sizes: using cached result" 2 + } else { +- set et_vect_strided_saved 0 +- if { [check_effective_target_vect_interleave] +- && [check_effective_target_vect_extract_even_odd] } { +- set et_vect_strided_saved 1 ++ set et_vect_multiple_sizes_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect_multiple_sizes_saved 1 + } + } -+ VEC_safe_push (gimple, heap, *stmts, last_stmt); - return pattern_stmt; +- verbose "check_effective_target_vect_strided: returning $et_vect_strided_saved" 2 +- return $et_vect_strided_saved ++ verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2 ++ return $et_vect_multiple_sizes_saved } -@@ -462,8 +681,9 @@ - */ - - static gimple --vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) -+vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) - { -+ gimple last_stmt = VEC_index (gimple, *stmts, 0); - tree fn, base, exp = NULL; - gimple stmt; - tree var; -@@ -574,16 +794,24 @@ - inner-loop nested in an outer-loop that us being vectorized). */ - - static gimple --vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) -+vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, -+ tree *type_out) - { -+ gimple last_stmt = VEC_index (gimple, *stmts, 0); - gimple stmt; - tree oprnd0, oprnd1; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); - tree type, half_type; - gimple pattern_stmt; - loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -- struct loop *loop = LOOP_VINFO_LOOP (loop_info); -+ struct loop *loop; - tree var; -+ bool promotion; -+ -+ if (!loop_info) -+ return NULL; +-# Return 1 if the target supports vector interleaving and extract even/odd +-# for wide element types, 0 otherwise. +-proc check_effective_target_vect_strided_wide { } { +- global et_vect_strided_wide_saved ++# Return 1 if the target supports vectors of 64 bits. + -+ loop = LOOP_VINFO_LOOP (loop_info); - - if (!is_gimple_assign (last_stmt)) - return NULL; -@@ -612,14 +840,16 @@ - || !types_compatible_p (TREE_TYPE (oprnd1), type)) - return NULL; ++proc check_effective_target_vect64 { } { ++ global et_vect64_saved -- /* So far so good. Since last_stmt was detected as a (summation) reduction, -+ /* So far so good. Since last_stmt was detected as a (summation) reduction, - we know that oprnd1 is the reduction variable (defined by a loop-header - phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. - Left to check that oprnd0 is defined by a cast from type 'type' to type - 'TYPE'. */ +- if [info exists et_vect_strided_wide_saved] { +- verbose "check_effective_target_vect_strided_wide: using cached result" 2 ++ if [info exists et_vect64_saved] { ++ verbose "check_effective_target_vect64: using cached result" 2 + } else { +- set et_vect_strided_wide_saved 0 +- if { [check_effective_target_vect_interleave] +- && [check_effective_target_vect_extract_even_odd_wide] } { +- set et_vect_strided_wide_saved 1 ++ set et_vect64_saved 0 ++ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { ++ set et_vect64_saved 1 + } + } -- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) -- return NULL; -+ if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt, -+ &promotion) -+ || !promotion) -+ return NULL; +- verbose "check_effective_target_vect_strided_wide: returning $et_vect_strided_wide_saved" 2 +- return $et_vect_strided_wide_saved ++ verbose "check_effective_target_vect64: returning $et_vect64_saved" 2 ++ return $et_vect64_saved + } - oprnd0 = gimple_assign_rhs1 (stmt); - *type_in = half_type; -@@ -641,9 +871,813 @@ - when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + # Return 1 if the target supports section-anchors +@@ -3302,6 +3445,31 @@ + return $et_sync_int_long_saved + } -+ VEC_safe_push (gimple, heap, *stmts, last_stmt); -+ return pattern_stmt; -+} -+ ++# Return 1 if the target supports atomic operations on "long long" and can ++# execute them ++# So far only put checks in for ARM, others may want to add their own ++proc check_effective_target_sync_longlong { } { ++ return [check_runtime sync_longlong_runtime { ++ #include ++ int main () ++ { ++ long long l1; + -+/* Return TRUE if the operation in STMT can be performed on a smaller type. ++ if (sizeof (long long) != 8) ++ exit (1); + -+ Input: -+ STMT - a statement to check. -+ DEF - we support operations with two operands, one of which is constant. -+ The other operand can be defined by a demotion operation, or by a -+ previous statement in a sequence of over-promoted operations. In the -+ later case DEF is used to replace that operand. (It is defined by a -+ pattern statement we created for the previous statement in the -+ sequence). ++ #ifdef __arm__ ++ /* Just check for native; checking for kernel fallback is tricky. */ ++ asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1"); ++ #else ++ # error "Add other suitable archs here" ++ #endif + -+ Input/output: -+ NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not -+ NULL, it's the type of DEF. -+ STMTS - additional pattern statements. If a pattern statement (type -+ conversion) is created in this function, its original statement is -+ added to STMTS. ++ exit (0); ++ } ++ } "" ] ++} + -+ Output: -+ OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new -+ operands to use in the new pattern statement for STMT (will be created -+ in vect_recog_over_widening_pattern ()). -+ NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern -+ statements for STMT: the first one is a type promotion and the second -+ one is the operation itself. We return the type promotion statement -+ in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of -+ the second pattern statement. */ + # Return 1 if the target supports atomic operations on "char" and "short". + + proc check_effective_target_sync_char_short { } { +@@ -3635,11 +3803,11 @@ + return $flags + } + +-# Add to FLAGS the flags needed to enable 128-bit vectors. ++# Add to FLAGS the flags needed to enable 64-bit vectors. + +-proc add_options_for_quad_vectors { flags } { ++proc add_options_for_double_vectors { flags } { + if [is-effective-target arm_neon_ok] { +- return "$flags -mvectorize-with-neon-quad" ++ return "$flags -mvectorize-with-neon-double" + } + + return $flags +--- a/src/gcc/tree-affine.c ++++ b/src/gcc/tree-affine.c +@@ -887,3 +887,30 @@ + *size = shwi_to_double_int ((bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT); + } + ++/* Returns true if a region of size SIZE1 at position 0 and a region of ++ size SIZE2 at position DIFF cannot overlap. */ + -+static bool -+vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, -+ tree *op0, tree *op1, gimple *new_def_stmt, -+ VEC (gimple, heap) **stmts) ++bool ++aff_comb_cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2) +{ -+ enum tree_code code; -+ tree const_oprnd, oprnd; -+ tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type; -+ gimple def_stmt, new_stmt; -+ bool first = false; -+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); -+ bool promotion; -+ bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); -+ struct loop *loop = NULL; -+ -+ if (loop_info) -+ loop = LOOP_VINFO_LOOP (loop_info); -+ -+ *new_def_stmt = NULL; ++ double_int d, bound; + -+ if (!is_gimple_assign (stmt)) ++ /* Unless the difference is a constant, we fail. */ ++ if (diff->n != 0) + return false; + -+ code = gimple_assign_rhs_code (stmt); -+ if (code != LSHIFT_EXPR && code != RSHIFT_EXPR -+ && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) -+ return false; ++ d = diff->offset; ++ if (double_int_negative_p (d)) ++ { ++ /* The second object is before the first one, we succeed if the last ++ element of the second object is before the start of the first one. */ ++ bound = double_int_add (d, double_int_add (size2, double_int_minus_one)); ++ return double_int_negative_p (bound); ++ } ++ else ++ { ++ /* We succeed if the second object starts after the first one ends. */ ++ return double_int_scmp (size1, d) <= 0; ++ } ++} + -+ oprnd = gimple_assign_rhs1 (stmt); -+ const_oprnd = gimple_assign_rhs2 (stmt); -+ type = gimple_expr_type (stmt); +--- a/src/gcc/tree-affine.h ++++ b/src/gcc/tree-affine.h +@@ -76,6 +76,7 @@ + struct pointer_map_t **); + void get_inner_reference_aff (tree, aff_tree *, double_int *); + void free_affine_expand_cache (struct pointer_map_t **); ++bool aff_comb_cannot_overlap_p (aff_tree *, double_int, double_int); + + /* Debugging functions. */ + void print_aff (FILE *, aff_tree *); +--- a/src/gcc/tree.c ++++ b/src/gcc/tree.c +@@ -7321,6 +7321,15 @@ + return build_array_type_1 (elt_type, index_type, false); + } + ++/* Return a representation of ELT_TYPE[NELTS], using indices of type ++ sizetype. */ + -+ if (TREE_CODE (oprnd) != SSA_NAME -+ || TREE_CODE (const_oprnd) != INTEGER_CST) -+ return false; ++tree ++build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts) ++{ ++ return build_array_type (elt_type, build_index_type (size_int (nelts - 1))); ++} + -+ /* If we are in the middle of a sequence, we use DEF from a previous -+ statement. Otherwise, OPRND has to be a result of type promotion. */ -+ if (*new_type) + /* Recursively examines the array elements of TYPE, until a non-array + element type is found. */ + +--- a/src/gcc/tree-cfg.c ++++ b/src/gcc/tree-cfg.c +@@ -3046,7 +3046,26 @@ + tree fntype; + unsigned i; + +- if (TREE_CODE (fn) != OBJ_TYPE_REF ++ if (gimple_call_internal_p (stmt)) + { -+ half_type = *new_type; -+ oprnd = def; ++ if (fn) ++ { ++ error ("gimple call has two targets"); ++ debug_generic_stmt (fn); ++ return true; ++ } + } + else + { -+ first = true; -+ if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt, -+ &promotion) -+ || !promotion -+ || !gimple_bb (def_stmt) -+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) -+ || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info) -+ && gimple_code (def_stmt) != GIMPLE_PHI) -+ || !vinfo_for_stmt (def_stmt)) -+ return false; ++ if (!fn) ++ { ++ error ("gimple call has no target"); ++ return true; ++ } + } + -+ /* Can we perform the operation on a smaller type? */ -+ switch (code) -+ { -+ case BIT_IOR_EXPR: -+ case BIT_XOR_EXPR: -+ case BIT_AND_EXPR: -+ if (!int_fits_type_p (const_oprnd, half_type)) ++ if (fn ++ && TREE_CODE (fn) != OBJ_TYPE_REF + && !is_gimple_val (fn)) + { + error ("invalid function in gimple call"); +@@ -3054,9 +3073,10 @@ + return true; + } + +- if (!POINTER_TYPE_P (TREE_TYPE (fn)) +- || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE +- && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE)) ++ if (fn ++ && (!POINTER_TYPE_P (TREE_TYPE (fn)) ++ || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE ++ && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE))) + { + error ("non-function in gimple call"); + return true; +@@ -3076,8 +3096,12 @@ + return true; + } + +- fntype = TREE_TYPE (TREE_TYPE (fn)); +- if (gimple_call_lhs (stmt) ++ if (fn) ++ fntype = TREE_TYPE (TREE_TYPE (fn)); ++ else ++ fntype = NULL_TREE; ++ if (fntype ++ && gimple_call_lhs (stmt) + && !useless_type_conversion_p (TREE_TYPE (gimple_call_lhs (stmt)), + TREE_TYPE (fntype)) + /* ??? At least C++ misses conversions at assignments from +@@ -3449,6 +3473,44 @@ + return false; + } + ++ case WIDEN_LSHIFT_EXPR: ++ { ++ if (!INTEGRAL_TYPE_P (lhs_type) ++ || !INTEGRAL_TYPE_P (rhs1_type) ++ || TREE_CODE (rhs2) != INTEGER_CST ++ || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))) + { -+ /* HALF_TYPE is not enough. Try a bigger type if possible. */ -+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) -+ return false; -+ -+ interm_type = build_nonstandard_integer_type ( -+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); -+ if (!int_fits_type_p (const_oprnd, interm_type)) -+ return false; ++ error ("type mismatch in widening vector shift expression"); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; + } + -+ break; -+ -+ case LSHIFT_EXPR: -+ /* Try intermediate type - HALF_TYPE is not enough for sure. */ -+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) -+ return false; -+ -+ /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. -+ (e.g., if the original value was char, the shift amount is at most 8 -+ if we want to use short). */ -+ if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) -+ return false; ++ return false; ++ } + -+ interm_type = build_nonstandard_integer_type ( -+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ { ++ if (TREE_CODE (rhs1_type) != VECTOR_TYPE ++ || TREE_CODE (lhs_type) != VECTOR_TYPE ++ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) ++ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type)) ++ || TREE_CODE (rhs2) != INTEGER_CST ++ || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type)) ++ > TYPE_PRECISION (TREE_TYPE (lhs_type)))) ++ { ++ error ("type mismatch in widening vector shift expression"); ++ debug_generic_expr (lhs_type); ++ debug_generic_expr (rhs1_type); ++ debug_generic_expr (rhs2_type); ++ return true; ++ } + -+ if (!vect_supportable_shift (code, interm_type)) -+ return false; ++ return false; ++ } + -+ break; + case PLUS_EXPR: + case MINUS_EXPR: + { +@@ -3550,7 +3612,7 @@ + case WIDEN_MULT_EXPR: + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return true; +- return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)) ++ return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)) + || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))); + + case WIDEN_SUM_EXPR: +@@ -3643,7 +3705,7 @@ + && !FIXED_POINT_TYPE_P (rhs1_type)) + || !useless_type_conversion_p (rhs1_type, rhs2_type) + || !useless_type_conversion_p (lhs_type, rhs3_type) +- || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type) ++ || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type) + || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)) + { + error ("type mismatch in widening multiply-accumulate expression"); +@@ -4130,9 +4192,10 @@ + didn't see a function declaration before the call. */ + if (is_gimple_call (stmt)) + { +- tree decl; ++ tree fn, decl; + +- if (!is_gimple_call_addr (gimple_call_fn (stmt))) ++ fn = gimple_call_fn (stmt); ++ if (fn && !is_gimple_call_addr (fn)) + { + error ("invalid function in call statement"); + return true; +@@ -7503,6 +7566,8 @@ + case GIMPLE_CALL: + if (gimple_call_lhs (g)) + break; ++ if (gimple_call_internal_p (g)) ++ break; + + /* This is a naked call, as opposed to a GIMPLE_CALL with an + LHS. All calls whose value is ignored should be +--- a/src/gcc/tree-data-ref.c ++++ b/src/gcc/tree-data-ref.c +@@ -1,5 +1,5 @@ + /* Data references and dependences detectors. +- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 ++ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Free Software Foundation, Inc. + Contributed by Sebastian Pop + +@@ -84,6 +84,7 @@ + #include "tree-scalar-evolution.h" + #include "tree-pass.h" + #include "langhooks.h" ++#include "tree-affine.h" + + static struct datadep_stats + { +@@ -721,11 +722,11 @@ + } + + /* Analyzes the behavior of the memory reference DR in the innermost loop or +- basic block that contains it. Returns true if analysis succeed or false ++ basic block that contains it. Returns true if analysis succeed or false + otherwise. */ + + bool +-dr_analyze_innermost (struct data_reference *dr) ++dr_analyze_innermost (struct data_reference *dr, struct loop *nest) + { + gimple stmt = DR_STMT (dr); + struct loop *loop = loop_containing_stmt (stmt); +@@ -768,14 +769,25 @@ + } + else + base = build_fold_addr_expr (base); + -+ case RSHIFT_EXPR: -+ if (vect_supportable_shift (code, half_type)) -+ break; + if (in_loop) + { + if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, + false)) + { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "failed: evolution of base is not affine.\n"); +- return false; ++ if (nest) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "failed: evolution of base is not" ++ " affine.\n"); ++ return false; ++ } ++ else ++ { ++ base_iv.base = base; ++ base_iv.step = ssize_int (0); ++ base_iv.no_overflow = true; ++ } + } + } + else +@@ -800,10 +812,18 @@ + else if (!simple_iv (loop, loop_containing_stmt (stmt), + poffset, &offset_iv, false)) + { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "failed: evolution of offset is not" +- " affine.\n"); +- return false; ++ if (nest) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "failed: evolution of offset is not" ++ " affine.\n"); ++ return false; ++ } ++ else ++ { ++ offset_iv.base = poffset; ++ offset_iv.step = ssize_int (0); ++ } + } + } + +@@ -842,30 +862,30 @@ + tree base, off, access_fn = NULL_TREE; + basic_block before_loop = NULL; + +- if (nest) +- before_loop = block_before_loop (nest); ++ if (!nest) ++ { ++ DR_BASE_OBJECT (dr) = ref; ++ DR_ACCESS_FNS (dr) = NULL; ++ return; ++ } + -+ /* Try intermediate type - HALF_TYPE is not supported. */ -+ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) -+ return false; ++ before_loop = block_before_loop (nest); + + while (handled_component_p (aref)) + { + if (TREE_CODE (aref) == ARRAY_REF) + { + op = TREE_OPERAND (aref, 1); +- if (nest) +- { +- access_fn = analyze_scalar_evolution (loop, op); +- access_fn = instantiate_scev (before_loop, loop, access_fn); +- VEC_safe_push (tree, heap, access_fns, access_fn); +- } +- ++ access_fn = analyze_scalar_evolution (loop, op); ++ access_fn = instantiate_scev (before_loop, loop, access_fn); ++ VEC_safe_push (tree, heap, access_fns, access_fn); + TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0); + } + + aref = TREE_OPERAND (aref, 0); + } + +- if (nest +- && (INDIRECT_REF_P (aref) +- || TREE_CODE (aref) == MEM_REF)) ++ if (INDIRECT_REF_P (aref) || TREE_CODE (aref) == MEM_REF) + { + op = TREE_OPERAND (aref, 0); + access_fn = analyze_scalar_evolution (loop, op); +@@ -967,7 +987,7 @@ + DR_REF (dr) = memref; + DR_IS_READ (dr) = is_read; + +- dr_analyze_innermost (dr); ++ dr_analyze_innermost (dr, nest); + dr_analyze_indices (dr, nest, loop); + dr_analyze_alias (dr); + +@@ -991,6 +1011,48 @@ + return dr; + } + ++/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical ++ expressions. */ ++static bool ++dr_equal_offsets_p1 (tree offset1, tree offset2) ++{ ++ bool res; + -+ interm_type = build_nonstandard_integer_type ( -+ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ STRIP_NOPS (offset1); ++ STRIP_NOPS (offset2); + -+ if (!vect_supportable_shift (code, interm_type)) -+ return false; ++ if (offset1 == offset2) ++ return true; + -+ break; ++ if (TREE_CODE (offset1) != TREE_CODE (offset2) ++ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) ++ return false; + -+ default: -+ gcc_unreachable (); -+ } ++ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), ++ TREE_OPERAND (offset2, 0)); + -+ /* There are four possible cases: -+ 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's -+ the first statement in the sequence) -+ a. The original, HALF_TYPE, is not enough - we replace the promotion -+ from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. -+ b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original -+ promotion. -+ 2. OPRND is defined by a pattern statement we created. -+ a. Its type is not sufficient for the operation, we create a new stmt: -+ a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store -+ this statement in NEW_DEF_STMT, and it is later put in -+ STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT. -+ b. OPRND is good to use in the new statement. */ -+ if (first) -+ { -+ if (interm_type) -+ { -+ /* Replace the original type conversion HALF_TYPE->TYPE with -+ HALF_TYPE->INTERM_TYPE. */ -+ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) -+ { -+ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); -+ /* Check if the already created pattern stmt is what we need. */ -+ if (!is_gimple_assign (new_stmt) -+ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR -+ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) -+ return false; ++ if (!res || !BINARY_CLASS_P (offset1)) ++ return res; + -+ VEC_safe_push (gimple, heap, *stmts, def_stmt); -+ oprnd = gimple_assign_lhs (new_stmt); -+ } -+ else -+ { -+ /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ -+ oprnd = gimple_assign_rhs1 (def_stmt); -+ tmp = create_tmp_reg (interm_type, NULL); -+ add_referenced_var (tmp); -+ new_oprnd = make_ssa_name (tmp, NULL); -+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, -+ oprnd, NULL_TREE); -+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; -+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; -+ VEC_safe_push (gimple, heap, *stmts, def_stmt); -+ oprnd = new_oprnd; -+ } -+ } -+ else -+ { -+ /* Retrieve the operand before the type promotion. */ -+ oprnd = gimple_assign_rhs1 (def_stmt); -+ } -+ } -+ else -+ { -+ if (interm_type) -+ { -+ /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ -+ tmp = create_tmp_reg (interm_type, NULL); -+ add_referenced_var (tmp); -+ new_oprnd = make_ssa_name (tmp, NULL); -+ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, -+ oprnd, NULL_TREE); -+ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; -+ oprnd = new_oprnd; -+ *new_def_stmt = new_stmt; -+ } ++ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), ++ TREE_OPERAND (offset2, 1)); + -+ /* Otherwise, OPRND is already set. */ -+ } ++ return res; ++} + -+ if (interm_type) -+ *new_type = interm_type; -+ else -+ *new_type = half_type; ++/* Check if DRA and DRB have equal offsets. */ ++bool ++dr_equal_offsets_p (struct data_reference *dra, ++ struct data_reference *drb) ++{ ++ tree offset1, offset2; + -+ *op0 = oprnd; -+ *op1 = fold_convert (*new_type, const_oprnd); ++ offset1 = DR_OFFSET (dra); ++ offset2 = DR_OFFSET (drb); + -+ return true; ++ return dr_equal_offsets_p1 (offset1, offset2); +} + + /* Returns true if FNA == FNB. */ + + static bool +@@ -1240,14 +1302,33 @@ + } + + /* Returns false if we can prove that data references A and B do not alias, +- true otherwise. */ ++ true otherwise. If LOOP_NEST is false no cross-iteration aliases are ++ considered. */ + + bool +-dr_may_alias_p (const struct data_reference *a, const struct data_reference *b) ++dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, ++ bool loop_nest) + { + tree addr_a = DR_BASE_OBJECT (a); + tree addr_b = DR_BASE_OBJECT (b); + ++ /* If we are not processing a loop nest but scalar code we ++ do not need to care about possible cross-iteration dependences ++ and thus can process the full original reference. Do so, ++ similar to how loop invariant motion applies extra offset-based ++ disambiguation. */ ++ if (!loop_nest) ++ { ++ aff_tree off1, off2; ++ double_int size1, size2; ++ get_inner_reference_aff (DR_REF (a), &off1, &size1); ++ get_inner_reference_aff (DR_REF (b), &off2, &size2); ++ aff_combination_scale (&off1, double_int_minus_one); ++ aff_combination_add (&off2, &off1); ++ if (aff_comb_cannot_overlap_p (&off2, size1, size2)) ++ return false; ++ } + -+/* Try to find a statement or a sequence of statements that can be performed -+ on a smaller type: + if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) + return refs_output_dependent_p (addr_a, addr_b); + else if (DR_IS_READ (a) && DR_IS_WRITE (b)) +@@ -1285,7 +1366,7 @@ + } + + /* If the data references do not alias, then they are independent. */ +- if (!dr_may_alias_p (a, b)) ++ if (!dr_may_alias_p (a, b, loop_nest != NULL)) + { + DDR_ARE_DEPENDENT (res) = chrec_known; + return res; +@@ -4162,7 +4243,7 @@ + if ((stmt_code == GIMPLE_CALL + && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))) + || (stmt_code == GIMPLE_ASM +- && gimple_asm_volatile_p (stmt))) ++ && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt)))) + clobbers_memory = true; + + if (!gimple_vuse (stmt)) +@@ -4294,7 +4375,7 @@ + DATAREFS. Returns chrec_dont_know when failing to analyze a + difficult case, returns NULL_TREE otherwise. */ + +-static tree ++tree + find_data_references_in_bb (struct loop *loop, basic_block bb, + VEC (data_reference_p, heap) **datarefs) + { +@@ -5143,7 +5224,7 @@ + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; + +- res = dr_analyze_innermost (dr) ++ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)) + && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)); + + free_data_ref (dr); +@@ -5183,7 +5264,7 @@ + + DR_STMT (dr) = stmt; + DR_REF (dr) = *ref->pos; +- dr_analyze_innermost (dr); ++ dr_analyze_innermost (dr, loop_containing_stmt (stmt)); + base_address = DR_BASE_ADDRESS (dr); + + if (!base_address) +--- a/src/gcc/tree-data-ref.h ++++ b/src/gcc/tree-data-ref.h +@@ -386,7 +386,7 @@ + DEF_VEC_ALLOC_O (data_ref_loc, heap); + + bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **); +-bool dr_analyze_innermost (struct data_reference *); ++bool dr_analyze_innermost (struct data_reference *, struct loop *); + extern bool compute_data_dependences_for_loop (struct loop *, bool, + VEC (loop_p, heap) **, + VEC (data_reference_p, heap) **, +@@ -426,10 +426,14 @@ + extern void compute_all_dependences (VEC (data_reference_p, heap) *, + VEC (ddr_p, heap) **, VEC (loop_p, heap) *, + bool); ++extern tree find_data_references_in_bb (struct loop *, basic_block, ++ VEC (data_reference_p, heap) **); + + extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *); + extern bool dr_may_alias_p (const struct data_reference *, +- const struct data_reference *); ++ const struct data_reference *, bool); ++extern bool dr_equal_offsets_p (struct data_reference *, ++ struct data_reference *); + + + /* Return true when the base objects of data references A and B are +--- a/src/gcc/tree.def ++++ b/src/gcc/tree.def +@@ -1092,6 +1092,19 @@ + is subtracted from t3. */ + DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3) + ++/* Widening shift left. ++ The first operand is of type t1. ++ The second operand is the number of bits to shift by; it need not be the ++ same type as the first operand and result. ++ Note that the result is undefined if the second operand is larger ++ than or equal to the first operand's type size. ++ The type of the entire expression is t2, such that t2 is at least twice ++ the size of t1. ++ WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting) ++ the first argument from type t1 to type t2, and then shifting it ++ by the second argument. */ ++DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2) + -+ type x_t; -+ TYPE x_T, res0_T, res1_T; -+ loop: -+ S1 x_t = *p; -+ S2 x_T = (TYPE) x_t; -+ S3 res0_T = op (x_T, C0); -+ S4 res1_T = op (res0_T, C1); -+ S5 ... = () res1_T; - type demotion + /* Fused multiply-add. + All operands and the result are of the same type. No intermediate + rounding is performed after multiplying operand one with operand two +@@ -1147,6 +1160,16 @@ + DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2) + DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) + ++/* Widening vector shift left in bits. ++ Operand 0 is a vector to be shifted with N elements of size S. ++ Operand 1 is an integer shift amount in bits. ++ The result of the operation is N elements of size 2*S. ++ VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results. ++ VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results. ++ */ ++DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2) ++DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2) + -+ where type 'TYPE' is at least double the size of type 'type', C0 and C1 are -+ constants. -+ Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either -+ be 'type' or some intermediate type. For now, we expect S5 to be a type -+ demotion operation. We also check that S3 and S4 have only one use. -+. + /* PREDICT_EXPR. Specify hint for branch prediction. The + PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the + outcome (0 for not taken and 1 for taken). Once the profile is guessed +--- a/src/gcc/tree-eh.c ++++ b/src/gcc/tree-eh.c +@@ -2752,7 +2752,7 @@ + || gimple_call_lhs (twos) + || gimple_call_chain (ones) + || gimple_call_chain (twos) +- || !operand_equal_p (gimple_call_fn (ones), gimple_call_fn (twos), 0) ++ || !gimple_call_same_target_p (ones, twos) + || gimple_call_num_args (ones) != gimple_call_num_args (twos)) + return false; + +--- a/src/gcc/tree.h ++++ b/src/gcc/tree.h +@@ -4197,6 +4197,7 @@ + extern tree build_index_type (tree); + extern tree build_array_type (tree, tree); + extern tree build_nonshared_array_type (tree, tree); ++extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT); + extern tree build_function_type (tree, tree); + extern tree build_function_type_list (tree, ...); + extern tree build_function_type_skip_args (tree, bitmap); +@@ -4626,21 +4627,10 @@ + + extern VEC(tree,gc) *ctor_to_vec (tree); + +-/* Examine CTOR to discover: +- * how many scalar fields are set to nonzero values, +- and place it in *P_NZ_ELTS; +- * how many scalar fields in total are in CTOR, +- and place it in *P_ELT_COUNT. +- * if a type is a union, and the initializer from the constructor +- is not the largest element in the union, then set *p_must_clear. ++extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, ++ HOST_WIDE_INT *, bool *); + +- Return whether or not CTOR is a valid static constant initializer, the same +- as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ +- +-extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *, +- bool *); +- +-extern HOST_WIDE_INT count_type_elements (const_tree, bool); ++extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree); + + /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */ + +--- a/src/gcc/tree-if-conv.c ++++ b/src/gcc/tree-if-conv.c +@@ -464,8 +464,8 @@ + /* Returns true when the memory references of STMT are read or written + unconditionally. In other words, this function returns true when + for every data reference A in STMT there exist other accesses to +- the same data reference with predicates that add up (OR-up) to the +- true predicate: this ensures that the data reference A is touched ++ a data reference with the same base with predicates that add up (OR-up) to ++ the true predicate: this ensures that the data reference A is touched + (read or written) on every iteration of the if-converted loop. */ + + static bool +@@ -489,21 +489,38 @@ + continue; + + for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++) +- if (DR_STMT (b) != stmt +- && same_data_refs (a, b)) +- { +- tree cb = bb_predicate (gimple_bb (DR_STMT (b))); +- +- if (DR_RW_UNCONDITIONALLY (b) == 1 +- || is_true_predicate (cb) +- || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb), +- ca, cb))) +- { +- DR_RW_UNCONDITIONALLY (a) = 1; +- DR_RW_UNCONDITIONALLY (b) = 1; +- found = true; +- break; +- } ++ { ++ tree ref_base_a = DR_REF (a); ++ tree ref_base_b = DR_REF (b); + -+*/ -+static gimple -+vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, -+ tree *type_in, tree *type_out) -+{ -+ gimple stmt = VEC_pop (gimple, *stmts); -+ gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; -+ tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type; -+ imm_use_iterator imm_iter; -+ use_operand_p use_p; -+ int nuses = 0; -+ tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; -+ bool first; -+ loop_vec_info loop_vinfo; -+ struct loop *loop = NULL; -+ bb_vec_info bb_vinfo; -+ stmt_vec_info stmt_vinfo; ++ if (DR_STMT (b) == stmt) ++ continue; + -+ stmt_vinfo = vinfo_for_stmt (stmt); -+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -+ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); -+ if (loop_vinfo) -+ loop = LOOP_VINFO_LOOP (loop_vinfo); ++ while (TREE_CODE (ref_base_a) == COMPONENT_REF ++ || TREE_CODE (ref_base_a) == IMAGPART_EXPR ++ || TREE_CODE (ref_base_a) == REALPART_EXPR) ++ ref_base_a = TREE_OPERAND (ref_base_a, 0); + -+ first = true; -+ while (1) -+ { -+ if (!vinfo_for_stmt (stmt) -+ || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) -+ return NULL; ++ while (TREE_CODE (ref_base_b) == COMPONENT_REF ++ || TREE_CODE (ref_base_b) == IMAGPART_EXPR ++ || TREE_CODE (ref_base_b) == REALPART_EXPR) ++ ref_base_b = TREE_OPERAND (ref_base_b, 0); + -+ new_def_stmt = NULL; -+ if (!vect_operation_fits_smaller_type (stmt, var, &new_type, -+ &op0, &op1, &new_def_stmt, -+ stmts)) -+ { -+ if (first) -+ return NULL; -+ else -+ break; -+ } ++ if (!operand_equal_p (ref_base_a, ref_base_b, 0)) ++ { ++ tree cb = bb_predicate (gimple_bb (DR_STMT (b))); + -+ /* STMT can be performed on a smaller type. Check its uses. */ -+ lhs = gimple_assign_lhs (stmt); -+ nuses = 0; -+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) -+ { -+ if (is_gimple_debug (USE_STMT (use_p))) -+ continue; -+ use_stmt = USE_STMT (use_p); -+ nuses++; -+ } ++ if (DR_RW_UNCONDITIONALLY (b) == 1 ++ || is_true_predicate (cb) ++ || is_true_predicate (ca ++ = fold_or_predicates (EXPR_LOCATION (cb), ca, cb))) ++ { ++ DR_RW_UNCONDITIONALLY (a) = 1; ++ DR_RW_UNCONDITIONALLY (b) = 1; ++ found = true; ++ break; ++ } ++ } + } + + if (!found) +--- a/src/gcc/tree-inline.c ++++ b/src/gcc/tree-inline.c +@@ -3343,6 +3343,7 @@ + case DOT_PROD_EXPR: + case WIDEN_MULT_PLUS_EXPR: + case WIDEN_MULT_MINUS_EXPR: ++ case WIDEN_LSHIFT_EXPR: + + case VEC_WIDEN_MULT_HI_EXPR: + case VEC_WIDEN_MULT_LO_EXPR: +@@ -3357,6 +3358,8 @@ + case VEC_EXTRACT_ODD_EXPR: + case VEC_INTERLEAVE_HIGH_EXPR: + case VEC_INTERLEAVE_LOW_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: + + return 1; + +@@ -3474,10 +3477,13 @@ + { + tree decl = gimple_call_fndecl (stmt); + tree addr = gimple_call_fn (stmt); +- tree funtype = TREE_TYPE (addr); ++ tree funtype = NULL_TREE; + bool stdarg = false; + +- if (POINTER_TYPE_P (funtype)) ++ if (addr) ++ funtype = TREE_TYPE (addr); + -+ if (nuses != 1 || !is_gimple_assign (use_stmt) -+ || !gimple_bb (use_stmt) -+ || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) -+ || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo))) -+ return NULL; ++ if (funtype && POINTER_TYPE_P (funtype)) + funtype = TREE_TYPE (funtype); + + /* Do not special case builtins where we see the body. +@@ -3517,7 +3523,7 @@ + if (decl) + funtype = TREE_TYPE (decl); + +- if (!VOID_TYPE_P (TREE_TYPE (funtype))) ++ if (funtype && !VOID_TYPE_P (TREE_TYPE (funtype))) + cost += estimate_move_cost (TREE_TYPE (funtype)); + + if (funtype) +--- a/src/gcc/tree-loop-distribution.c ++++ b/src/gcc/tree-loop-distribution.c +@@ -312,7 +312,7 @@ + + DR_STMT (dr) = stmt; + DR_REF (dr) = op0; +- res = dr_analyze_innermost (dr); ++ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt)); + gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); + + nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); +--- a/src/gcc/tree-predcom.c ++++ b/src/gcc/tree-predcom.c +@@ -1114,7 +1114,7 @@ + memset (&init_dr, 0, sizeof (struct data_reference)); + DR_REF (&init_dr) = init_ref; + DR_STMT (&init_dr) = phi; +- if (!dr_analyze_innermost (&init_dr)) ++ if (!dr_analyze_innermost (&init_dr, loop)) + return NULL; + + if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) +--- a/src/gcc/tree-pretty-print.c ++++ b/src/gcc/tree-pretty-print.c +@@ -1543,6 +1543,7 @@ + case RROTATE_EXPR: + case VEC_LSHIFT_EXPR: + case VEC_RSHIFT_EXPR: ++ case WIDEN_LSHIFT_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_AND_EXPR: +@@ -2213,6 +2214,22 @@ + pp_string (buffer, " > "); + break; + ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); ++ pp_string (buffer, ", "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); ++ pp_string (buffer, " > "); ++ break; + -+ /* Create pattern statement for STMT. */ -+ vectype = get_vectype_for_scalar_type (new_type); -+ if (!vectype) -+ return NULL; ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); ++ pp_string (buffer, ", "); ++ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); ++ pp_string (buffer, " > "); ++ break; ++ + case VEC_UNPACK_HI_EXPR: + pp_string (buffer, " VEC_UNPACK_HI_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); +@@ -2535,6 +2552,9 @@ + case RSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: ++ case VEC_WIDEN_LSHIFT_HI_EXPR: ++ case VEC_WIDEN_LSHIFT_LO_EXPR: ++ case WIDEN_LSHIFT_EXPR: + return 11; + + case WIDEN_SUM_EXPR: +@@ -2710,6 +2730,9 @@ + case VEC_RSHIFT_EXPR: + return "v>>"; + ++ case WIDEN_LSHIFT_EXPR: ++ return "w<<"; ++ + case POINTER_PLUS_EXPR: + return "+"; + +--- a/src/gcc/tree-ssa-ccp.c ++++ b/src/gcc/tree-ssa-ccp.c +@@ -522,10 +522,6 @@ + val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr), + TREE_OPERAND (base, 0), TREE_OPERAND (base, 1)); + else if (base +- /* ??? While function decls have DECL_ALIGN their addresses +- may encode extra information in the lower bits on some +- targets (PR47239). Simply punt for function decls for now. */ +- && TREE_CODE (base) != FUNCTION_DECL + && ((align = get_object_alignment (base, BIGGEST_ALIGNMENT)) + > BITS_PER_UNIT)) + { +@@ -1279,7 +1275,10 @@ + + case GIMPLE_CALL: + { +- tree fn = valueize_op (gimple_call_fn (stmt)); ++ tree fn = gimple_call_fn (stmt); ++ if (!fn) ++ return NULL_TREE; ++ fn = valueize_op (fn); + if (TREE_CODE (fn) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL + && DECL_BUILT_IN (TREE_OPERAND (fn, 0))) +@@ -2321,6 +2320,11 @@ + return true; + } + ++ /* Internal calls provide no argument types, so the extra laxity ++ for normal calls does not apply. */ ++ if (gimple_call_internal_p (stmt)) ++ return false; ++ + /* Propagate into the call arguments. Compared to replace_uses_in + this can use the argument slot types for type verification + instead of the current argument type. We also can safely +--- a/src/gcc/tree-ssa-dom.c ++++ b/src/gcc/tree-ssa-dom.c +@@ -64,7 +64,7 @@ + struct { enum tree_code op; tree opnd; } unary; + struct { enum tree_code op; tree opnd0, opnd1; } binary; + struct { enum tree_code op; tree opnd0, opnd1, opnd2; } ternary; +- struct { tree fn; bool pure; size_t nargs; tree *args; } call; ++ struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call; + } ops; + }; + +@@ -257,7 +257,7 @@ + + expr->type = TREE_TYPE (gimple_call_lhs (stmt)); + expr->kind = EXPR_CALL; +- expr->ops.call.fn = gimple_call_fn (stmt); ++ expr->ops.call.fn_from = stmt; + + if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)) + expr->ops.call.pure = true; +@@ -421,8 +421,8 @@ + + /* If the calls are to different functions, then they + clearly cannot be equal. */ +- if (! operand_equal_p (expr0->ops.call.fn, +- expr1->ops.call.fn, 0)) ++ if (!gimple_call_same_target_p (expr0->ops.call.fn_from, ++ expr1->ops.call.fn_from)) + return false; + + if (! expr0->ops.call.pure) +@@ -502,9 +502,15 @@ + { + size_t i; + enum tree_code code = CALL_EXPR; ++ gimple fn_from; + + val = iterative_hash_object (code, val); +- val = iterative_hash_expr (expr->ops.call.fn, val); ++ fn_from = expr->ops.call.fn_from; ++ if (gimple_call_internal_p (fn_from)) ++ val = iterative_hash_hashval_t ++ ((hashval_t) gimple_call_internal_fn (fn_from), val); ++ else ++ val = iterative_hash_expr (gimple_call_fn (fn_from), val); + for (i = 0; i < expr->ops.call.nargs; i++) + val = iterative_hash_expr (expr->ops.call.args[i], val); + } +@@ -564,8 +570,14 @@ + { + size_t i; + size_t nargs = element->expr.ops.call.nargs; ++ gimple fn_from; + +- print_generic_expr (stream, element->expr.ops.call.fn, 0); ++ fn_from = element->expr.ops.call.fn_from; ++ if (gimple_call_internal_p (fn_from)) ++ fputs (internal_fn_name (gimple_call_internal_fn (fn_from)), ++ stream); ++ else ++ print_generic_expr (stream, gimple_call_fn (fn_from), 0); + fprintf (stream, " ("); + for (i = 0; i < nargs; i++) + { +--- a/src/gcc/tree-ssa-loop-im.c ++++ b/src/gcc/tree-ssa-loop-im.c +@@ -1834,33 +1834,6 @@ + create_vop_ref_mapping (); + } + +-/* Returns true if a region of size SIZE1 at position 0 and a region of +- size SIZE2 at position DIFF cannot overlap. */ +- +-static bool +-cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2) +-{ +- double_int d, bound; +- +- /* Unless the difference is a constant, we fail. */ +- if (diff->n != 0) +- return false; +- +- d = diff->offset; +- if (double_int_negative_p (d)) +- { +- /* The second object is before the first one, we succeed if the last +- element of the second object is before the start of the first one. */ +- bound = double_int_add (d, double_int_add (size2, double_int_minus_one)); +- return double_int_negative_p (bound); +- } +- else +- { +- /* We succeed if the second object starts after the first one ends. */ +- return double_int_scmp (size1, d) <= 0; +- } +-} +- + /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in + tree_to_aff_combination_expand. */ + +@@ -1889,7 +1862,7 @@ + aff_combination_scale (&off1, double_int_minus_one); + aff_combination_add (&off2, &off1); + +- if (cannot_overlap_p (&off2, size1, size2)) ++ if (aff_comb_cannot_overlap_p (&off2, size1, size2)) + return false; + + return true; +--- a/src/gcc/tree-ssa-math-opts.c ++++ b/src/gcc/tree-ssa-math-opts.c +@@ -1266,39 +1266,67 @@ + } + }; + +-/* Return true if RHS is a suitable operand for a widening multiplication. ++/* Build a gimple assignment to cast VAL to TARGET. Insert the statement ++ prior to GSI's current position, and return the fresh SSA name. */ + -+ /* We want to collect all the statements for which we create pattern -+ statetments, except for the case when the last statement in the -+ sequence doesn't have a corresponding pattern statement. In such -+ case we associate the last pattern statement with the last statement -+ in the sequence. Therefore, we only add the original statement to -+ the list if we know that it is not the last. */ -+ if (prev_stmt) -+ VEC_safe_push (gimple, heap, *stmts, prev_stmt); ++static tree ++build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, ++ tree target, tree val) ++{ ++ tree result = make_ssa_name (target, NULL); ++ gimple stmt = gimple_build_assign_with_ops (CONVERT_EXPR, result, val, NULL); ++ gimple_set_location (stmt, loc); ++ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); ++ return result; ++} + -+ var = vect_recog_temp_ssa_var (new_type, NULL); -+ pattern_stmt = gimple_build_assign_with_ops ( -+ gimple_assign_rhs_code (stmt), var, op0, op1); -+ SSA_NAME_DEF_STMT (var) = pattern_stmt; -+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; -+ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt; ++/* Return true if RHS is a suitable operand for a widening multiplication, ++ assuming a target type of TYPE. + There are two cases: + +- - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT +- if so, and store its type in *TYPE_OUT. ++ - RHS makes some value at least twice as wide. Store that value ++ in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. + + - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, + but leave *TYPE_OUT untouched. */ + + static bool +-is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out) ++is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, ++ tree *new_rhs_out) + { + gimple stmt; +- tree type, type1, rhs1; ++ tree type1, rhs1; + enum tree_code rhs_code; + + if (TREE_CODE (rhs) == SSA_NAME) + { +- type = TREE_TYPE (rhs); + stmt = SSA_NAME_DEF_STMT (rhs); +- if (!is_gimple_assign (stmt)) +- return false; ++ if (is_gimple_assign (stmt)) ++ { ++ rhs_code = gimple_assign_rhs_code (stmt); ++ if (TREE_CODE (type) == INTEGER_TYPE ++ ? !CONVERT_EXPR_CODE_P (rhs_code) ++ : rhs_code != FIXED_CONVERT_EXPR) ++ rhs1 = rhs; ++ else ++ { ++ rhs1 = gimple_assign_rhs1 (stmt); + +- rhs_code = gimple_assign_rhs_code (stmt); +- if (TREE_CODE (type) == INTEGER_TYPE +- ? !CONVERT_EXPR_CODE_P (rhs_code) +- : rhs_code != FIXED_CONVERT_EXPR) +- return false; ++ if (TREE_CODE (rhs1) == INTEGER_CST) ++ { ++ *new_rhs_out = rhs1; ++ *type_out = NULL; ++ return true; ++ } ++ } ++ } ++ else ++ rhs1 = rhs; + +- rhs1 = gimple_assign_rhs1 (stmt); + type1 = TREE_TYPE (rhs1); + -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "created pattern stmt: "); -+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); -+ } + if (TREE_CODE (type1) != TREE_CODE (type) +- || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type)) ++ || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) + return false; + + *new_rhs_out = rhs1; +@@ -1316,28 +1344,29 @@ + return false; + } + +-/* Return true if STMT performs a widening multiplication. If so, +- store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT +- respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting +- those operands to types *TYPE1_OUT and *TYPE2_OUT would give the +- operands of the multiplication. */ ++/* Return true if STMT performs a widening multiplication, assuming the ++ output type is TYPE. If so, store the unwidened types of the operands ++ in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and ++ *RHS2_OUT such that converting those operands to types *TYPE1_OUT ++ and *TYPE2_OUT would give the operands of the multiplication. */ + + static bool + is_widening_mult_p (gimple stmt, + tree *type1_out, tree *rhs1_out, + tree *type2_out, tree *rhs2_out) + { +- tree type; ++ tree type = TREE_TYPE (gimple_assign_lhs (stmt)); + +- type = TREE_TYPE (gimple_assign_lhs (stmt)); + if (TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != FIXED_POINT_TYPE) + return false; + +- if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out)) ++ if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, ++ rhs1_out)) + return false; + +- if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out)) ++ if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, ++ rhs2_out)) + return false; + + if (*type1_out == NULL) +@@ -1354,6 +1383,18 @@ + *type2_out = *type1_out; + } + ++ /* Ensure that the larger of the two operands comes first. */ ++ if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) ++ { ++ tree tmp; ++ tmp = *type1_out; ++ *type1_out = *type2_out; ++ *type2_out = tmp; ++ tmp = *rhs1_out; ++ *rhs1_out = *rhs2_out; ++ *rhs2_out = tmp; ++ } + -+ prev_stmt = stmt; -+ stmt = use_stmt; + return true; + } + +@@ -1362,10 +1403,15 @@ + value is true iff we converted the statement. */ + + static bool +-convert_mult_to_widen (gimple stmt) ++convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) + { +- tree lhs, rhs1, rhs2, type, type1, type2; ++ tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL; + enum insn_code handler; ++ enum machine_mode to_mode, from_mode, actual_mode; ++ optab op; ++ int actual_precision; ++ location_t loc = gimple_location (stmt); ++ bool from_unsigned1, from_unsigned2; + + lhs = gimple_assign_lhs (stmt); + type = TREE_TYPE (lhs); +@@ -1375,18 +1421,82 @@ + if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) + return false; + +- if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) +- handler = optab_handler (umul_widen_optab, TYPE_MODE (type)); +- else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) +- handler = optab_handler (smul_widen_optab, TYPE_MODE (type)); ++ to_mode = TYPE_MODE (type); ++ from_mode = TYPE_MODE (type1); ++ from_unsigned1 = TYPE_UNSIGNED (type1); ++ from_unsigned2 = TYPE_UNSIGNED (type2); + -+ first = false; -+ } ++ if (from_unsigned1 && from_unsigned2) ++ op = umul_widen_optab; ++ else if (!from_unsigned1 && !from_unsigned2) ++ op = smul_widen_optab; + else +- handler = optab_handler (usmul_widen_optab, TYPE_MODE (type)); ++ op = usmul_widen_optab; + -+ /* We got a sequence. We expect it to end with a type demotion operation. -+ Otherwise, we quit (for now). There are three possible cases: the -+ conversion is to NEW_TYPE (we don't do anything), the conversion is to -+ a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and -+ NEW_TYPE differs (we create a new conversion statement). */ -+ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) ++ handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, ++ 0, &actual_mode); + + if (handler == CODE_FOR_nothing) +- return false; + { -+ use_lhs = gimple_assign_lhs (use_stmt); -+ use_type = TREE_TYPE (use_lhs); -+ /* Support only type promotion or signedess change. */ -+ if (!INTEGRAL_TYPE_P (use_type) -+ || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) -+ return NULL; -+ -+ if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) -+ || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) -+ { -+ /* Create NEW_TYPE->USE_TYPE conversion. */ -+ tmp = create_tmp_reg (use_type, NULL); -+ add_referenced_var (tmp); -+ new_oprnd = make_ssa_name (tmp, NULL); -+ pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, -+ var, NULL_TREE); -+ SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt; -+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; ++ if (op != smul_widen_optab) ++ { ++ /* We can use a signed multiply with unsigned types as long as ++ there is a wider mode to use, or it is the smaller of the two ++ types that is unsigned. Note that type1 >= type2, always. */ ++ if ((TYPE_UNSIGNED (type1) ++ && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) ++ || (TYPE_UNSIGNED (type2) ++ && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) ++ { ++ from_mode = GET_MODE_WIDER_MODE (from_mode); ++ if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) ++ return false; ++ } + -+ *type_in = get_vectype_for_scalar_type (new_type); -+ *type_out = get_vectype_for_scalar_type (use_type); ++ op = smul_widen_optab; ++ handler = find_widening_optab_handler_and_mode (op, to_mode, ++ from_mode, 0, ++ &actual_mode); + -+ /* We created a pattern statement for the last statement in the -+ sequence, so we don't need to associate it with the pattern -+ statement created for PREV_STMT. Therefore, we add PREV_STMT -+ to the list in order to mark it later in vect_pattern_recog_1. */ -+ if (prev_stmt) -+ VEC_safe_push (gimple, heap, *stmts, prev_stmt); -+ } ++ if (handler == CODE_FOR_nothing) ++ return false; + +- gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1)); +- gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2)); ++ from_unsigned1 = from_unsigned2 = false; ++ } + else -+ { -+ if (prev_stmt) -+ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt)) -+ = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt)); -+ -+ *type_in = vectype; -+ *type_out = NULL_TREE; -+ } -+ -+ VEC_safe_push (gimple, heap, *stmts, use_stmt); ++ return false; + } -+ else -+ /* TODO: support general case, create a conversion to the correct type. */ -+ return NULL; + -+ /* Pattern detected. */ -+ if (vect_print_dump_info (REPORT_DETAILS)) ++ /* Ensure that the inputs to the handler are in the correct precison ++ for the opcode. This will be the full mode size. */ ++ actual_precision = GET_MODE_PRECISION (actual_mode); ++ if (actual_precision != TYPE_PRECISION (type1) ++ || from_unsigned1 != TYPE_UNSIGNED (type1)) + { -+ fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: "); -+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned1), ++ NULL); ++ rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1); + } -+ -+ return pattern_stmt; -+} -+ -+ -+/* Detect widening shift pattern: -+ -+ type a_t; -+ TYPE a_T, res_T; -+ -+ S1 a_t = ; -+ S2 a_T = (TYPE) a_t; -+ S3 res_T = a_T << CONST; -+ -+ where type 'TYPE' is at least double the size of type 'type'. -+ -+ Also detect unsigned cases: -+ -+ unsigned type a_t; -+ unsigned TYPE u_res_T; -+ TYPE a_T, res_T; -+ -+ S1 a_t = ; -+ S2 a_T = (TYPE) a_t; -+ S3 res_T = a_T << CONST; -+ S4 u_res_T = (unsigned TYPE) res_T; -+ -+ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we -+ create an additional pattern stmt for S2 to create a variable of an -+ intermediate type, and perform widen-shift on the intermediate type: -+ -+ type a_t; -+ interm_type a_it; -+ TYPE a_T, res_T, res_T'; -+ -+ S1 a_t = ; -+ S2 a_T = (TYPE) a_t; -+ '--> a_it = (interm_type) a_t; -+ S3 res_T = a_T << CONST; -+ '--> res_T' = a_it <<* CONST; -+ -+ Input/Output: -+ -+ * STMTS: Contains a stmt from which the pattern search begins. -+ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 -+ in STMTS. When an intermediate type is used and a pattern statement is -+ created for S2, we also put S2 here (before S3). -+ -+ Output: -+ -+ * TYPE_IN: The type of the input arguments to the pattern. -+ -+ * TYPE_OUT: The type of the output of this pattern. -+ -+ * Return value: A new stmt that will be used to replace the sequence of -+ stmts that constitute the pattern. In this case it will be: -+ WIDEN_LSHIFT_EXPR . */ -+ -+static gimple -+vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, -+ tree *type_in, tree *type_out) -+{ -+ gimple last_stmt = VEC_pop (gimple, *stmts); -+ gimple def_stmt0; -+ tree oprnd0, oprnd1; -+ tree type, half_type0; -+ gimple pattern_stmt, orig_stmt = NULL; -+ tree vectype, vectype_out = NULL_TREE; -+ tree dummy; -+ tree var; -+ enum tree_code dummy_code; -+ int dummy_int; -+ VEC (tree, heap) * dummy_vec; -+ gimple use_stmt = NULL; -+ bool over_widen = false; -+ bool promotion; -+ -+ if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) -+ return NULL; -+ -+ orig_stmt = last_stmt; -+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) ++ if (actual_precision != TYPE_PRECISION (type2) ++ || from_unsigned2 != TYPE_UNSIGNED (type2)) + { -+ /* This statement was also detected as over-widening operation (it can't -+ be any other pattern, because only over-widening detects shifts). -+ LAST_STMT is the final type demotion statement, but its related -+ statement is shift. We analyze the related statement to catch cases: -+ -+ orig code: -+ type a_t; -+ itype res; -+ TYPE a_T, res_T; -+ -+ S1 a_T = (TYPE) a_t; -+ S2 res_T = a_T << CONST; -+ S3 res = (itype)res_T; -+ -+ (size of type * 2 <= size of itype -+ and size of itype * 2 <= size of TYPE) -+ -+ code after over-widening pattern detection: -+ -+ S1 a_T = (TYPE) a_t; -+ --> a_it = (itype) a_t; -+ S2 res_T = a_T << CONST; -+ S3 res = (itype)res_T; <--- LAST_STMT -+ --> res = a_it << CONST; -+ -+ after widen_shift: -+ -+ S1 a_T = (TYPE) a_t; -+ --> a_it = (itype) a_t; - redundant -+ S2 res_T = a_T << CONST; -+ S3 res = (itype)res_T; -+ --> res = a_t w<< CONST; -+ -+ i.e., we replace the three statements with res = a_t w<< CONST. */ -+ last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt)); -+ over_widen = true; ++ /* Reuse the same type info, if possible. */ ++ if (!tmp || from_unsigned1 != from_unsigned2) ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned2), ++ NULL); ++ rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2); + } + -+ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) -+ return NULL; -+ -+ oprnd0 = gimple_assign_rhs1 (last_stmt); -+ oprnd1 = gimple_assign_rhs2 (last_stmt); -+ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) -+ return NULL; -+ -+ /* Check operand 0: it has to be defined by a type promotion. */ -+ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, -+ &promotion) -+ || !promotion) -+ return NULL; ++ /* Handle constants. */ ++ if (TREE_CODE (rhs1) == INTEGER_CST) ++ rhs1 = fold_convert (type1, rhs1); ++ if (TREE_CODE (rhs2) == INTEGER_CST) ++ rhs2 = fold_convert (type2, rhs2); + -+ /* Check operand 1: has to be positive. We check that it fits the type -+ in vect_handle_widen_op_by_const (). */ -+ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) -+ return NULL; ++ gimple_assign_set_rhs1 (stmt, rhs1); ++ gimple_assign_set_rhs2 (stmt, rhs2); + gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); + update_stmt (stmt); + return true; +@@ -1403,11 +1513,17 @@ + enum tree_code code) + { + gimple rhs1_stmt = NULL, rhs2_stmt = NULL; +- tree type, type1, type2; ++ gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; ++ tree type, type1, type2, optype, tmp = NULL; + tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; + enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; + optab this_optab; + enum tree_code wmult_code; ++ enum insn_code handler; ++ enum machine_mode to_mode, from_mode, actual_mode; ++ location_t loc = gimple_location (stmt); ++ int actual_precision; ++ bool from_unsigned1, from_unsigned2; + + lhs = gimple_assign_lhs (stmt); + type = TREE_TYPE (lhs); +@@ -1429,8 +1545,6 @@ + if (is_gimple_assign (rhs1_stmt)) + rhs1_code = gimple_assign_rhs_code (rhs1_stmt); + } +- else +- return false; + + if (TREE_CODE (rhs2) == SSA_NAME) + { +@@ -1438,57 +1552,160 @@ + if (is_gimple_assign (rhs2_stmt)) + rhs2_code = gimple_assign_rhs_code (rhs2_stmt); + } +- else +- return false; + +- if (code == PLUS_EXPR && rhs1_code == MULT_EXPR) ++ /* Allow for one conversion statement between the multiply ++ and addition/subtraction statement. If there are more than ++ one conversions then we assume they would invalidate this ++ transformation. If that's not the case then they should have ++ been folded before now. */ ++ if (CONVERT_EXPR_CODE_P (rhs1_code)) ++ { ++ conv1_stmt = rhs1_stmt; ++ rhs1 = gimple_assign_rhs1 (rhs1_stmt); ++ if (TREE_CODE (rhs1) == SSA_NAME) ++ { ++ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); ++ if (is_gimple_assign (rhs1_stmt)) ++ rhs1_code = gimple_assign_rhs_code (rhs1_stmt); ++ } ++ else ++ return false; ++ } ++ if (CONVERT_EXPR_CODE_P (rhs2_code)) ++ { ++ conv2_stmt = rhs2_stmt; ++ rhs2 = gimple_assign_rhs1 (rhs2_stmt); ++ if (TREE_CODE (rhs2) == SSA_NAME) ++ { ++ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); ++ if (is_gimple_assign (rhs2_stmt)) ++ rhs2_code = gimple_assign_rhs_code (rhs2_stmt); ++ } ++ else ++ return false; ++ } + -+ oprnd0 = gimple_assign_rhs1 (def_stmt0); -+ type = gimple_expr_type (last_stmt); ++ /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call ++ is_widening_mult_p, but we still need the rhs returns. + -+ /* Check if this a widening operation. */ -+ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, -+ &oprnd0, stmts, -+ type, &half_type0, def_stmt0)) -+ return NULL; ++ It might also appear that it would be sufficient to use the existing ++ operands of the widening multiply, but that would limit the choice of ++ multiply-and-accumulate instructions. */ ++ if (code == PLUS_EXPR ++ && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) + { + if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, + &type2, &mult_rhs2)) + return false; + add_rhs = rhs2; ++ conv_stmt = conv1_stmt; + } +- else if (rhs2_code == MULT_EXPR) ++ else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) + { + if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, + &type2, &mult_rhs2)) + return false; + add_rhs = rhs1; +- } +- else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR) +- { +- mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt); +- mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt); +- type1 = TREE_TYPE (mult_rhs1); +- type2 = TREE_TYPE (mult_rhs2); +- add_rhs = rhs2; +- } +- else if (rhs2_code == WIDEN_MULT_EXPR) +- { +- mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt); +- mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt); +- type1 = TREE_TYPE (mult_rhs1); +- type2 = TREE_TYPE (mult_rhs2); +- add_rhs = rhs1; ++ conv_stmt = conv2_stmt; + } + else + return false; + +- if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) +- return false; ++ to_mode = TYPE_MODE (type); ++ from_mode = TYPE_MODE (type1); ++ from_unsigned1 = TYPE_UNSIGNED (type1); ++ from_unsigned2 = TYPE_UNSIGNED (type2); + -+ /* Handle unsigned case. Look for -+ S4 u_res_T = (unsigned TYPE) res_T; -+ Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */ -+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) ++ /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ ++ if (from_unsigned1 != from_unsigned2) + { -+ tree lhs = gimple_assign_lhs (last_stmt), use_lhs; -+ imm_use_iterator imm_iter; -+ use_operand_p use_p; -+ int nuses = 0; -+ tree use_type; -+ -+ if (over_widen) -+ { -+ /* In case of over-widening pattern, S4 should be ORIG_STMT itself. -+ We check here that TYPE is the correct type for the operation, -+ i.e., it's the type of the original result. */ -+ tree orig_type = gimple_expr_type (orig_stmt); -+ if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type)) -+ || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type))) -+ return NULL; -+ } -+ else -+ { -+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) -+ { -+ if (is_gimple_debug (USE_STMT (use_p))) -+ continue; -+ use_stmt = USE_STMT (use_p); -+ nuses++; -+ } -+ -+ if (nuses != 1 || !is_gimple_assign (use_stmt) -+ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) -+ return NULL; -+ -+ use_lhs = gimple_assign_lhs (use_stmt); -+ use_type = TREE_TYPE (use_lhs); -+ -+ if (!INTEGRAL_TYPE_P (use_type) -+ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) -+ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) -+ return NULL; ++ /* We can use a signed multiply with unsigned types as long as ++ there is a wider mode to use, or it is the smaller of the two ++ types that is unsigned. Note that type1 >= type2, always. */ ++ if ((from_unsigned1 ++ && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) ++ || (from_unsigned2 ++ && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) ++ { ++ from_mode = GET_MODE_WIDER_MODE (from_mode); ++ if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) ++ return false; ++ } + -+ type = use_type; -+ } ++ from_unsigned1 = from_unsigned2 = false; + } + -+ /* Pattern detected. */ -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); -+ -+ /* Check target support. */ -+ vectype = get_vectype_for_scalar_type (half_type0); -+ vectype_out = get_vectype_for_scalar_type (type); -+ -+ if (!vectype -+ || !vectype_out -+ || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, -+ vectype_out, vectype, -+ &dummy, &dummy, &dummy_code, -+ &dummy_code, &dummy_int, -+ &dummy_vec)) -+ return NULL; -+ -+ *type_in = vectype; -+ *type_out = vectype_out; ++ /* If there was a conversion between the multiply and addition ++ then we need to make sure it fits a multiply-and-accumulate. ++ The should be a single mode change which does not change the ++ value. */ ++ if (conv_stmt) ++ { ++ /* We use the original, unmodified data types for this. */ ++ tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); ++ tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); ++ int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); ++ bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); + -+ /* Pattern supported. Create a stmt to be used to replace the pattern. */ -+ var = vect_recog_temp_ssa_var (type, NULL); -+ pattern_stmt = -+ gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); ++ if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) ++ { ++ /* Conversion is a truncate. */ ++ if (TYPE_PRECISION (to_type) < data_size) ++ return false; ++ } ++ else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) ++ { ++ /* Conversion is an extend. Check it's the right sort. */ ++ if (TYPE_UNSIGNED (from_type) != is_unsigned ++ && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) ++ return false; ++ } ++ /* else convert is a no-op for our purposes. */ ++ } + + /* Verify that the machine can perform a widening multiply + accumulate in this mode/signedness combination, otherwise + this transformation is likely to pessimize code. */ +- this_optab = optab_for_tree_code (wmult_code, type1, optab_default); +- if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing) ++ optype = build_nonstandard_integer_type (from_mode, from_unsigned1); ++ this_optab = optab_for_tree_code (wmult_code, optype, optab_default); ++ handler = find_widening_optab_handler_and_mode (this_optab, to_mode, ++ from_mode, 0, &actual_mode); + -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ if (handler == CODE_FOR_nothing) + return false; + +- /* ??? May need some type verification here? */ ++ /* Ensure that the inputs to the handler are in the correct precison ++ for the opcode. This will be the full mode size. */ ++ actual_precision = GET_MODE_PRECISION (actual_mode); ++ if (actual_precision != TYPE_PRECISION (type1) ++ || from_unsigned1 != TYPE_UNSIGNED (type1)) ++ { ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned1), ++ NULL); ++ mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1); ++ } ++ if (actual_precision != TYPE_PRECISION (type2) ++ || from_unsigned2 != TYPE_UNSIGNED (type2)) ++ { ++ if (!tmp || from_unsigned1 != from_unsigned2) ++ tmp = create_tmp_var (build_nonstandard_integer_type ++ (actual_precision, from_unsigned2), ++ NULL); ++ mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2); ++ } + -+ if (use_stmt) -+ last_stmt = use_stmt; -+ else -+ last_stmt = orig_stmt; ++ if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) ++ add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL), ++ add_rhs); + -+ VEC_safe_push (gimple, heap, *stmts, last_stmt); - return pattern_stmt; ++ /* Handle constants. */ ++ if (TREE_CODE (mult_rhs1) == INTEGER_CST) ++ mult_rhs1 = fold_convert (type1, mult_rhs1); ++ if (TREE_CODE (mult_rhs2) == INTEGER_CST) ++ mult_rhs2 = fold_convert (type2, mult_rhs2); + +- gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, +- fold_convert (type1, mult_rhs1), +- fold_convert (type2, mult_rhs2), ++ gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2, + add_rhs); + update_stmt (gsi_stmt (*gsi)); + return true; +@@ -1696,7 +1913,7 @@ + switch (code) + { + case MULT_EXPR: +- if (!convert_mult_to_widen (stmt) ++ if (!convert_mult_to_widen (stmt, &gsi) + && convert_mult_to_fma (stmt, + gimple_assign_rhs1 (stmt), + gimple_assign_rhs2 (stmt))) +--- a/src/gcc/tree-ssa-phiopt.c ++++ b/src/gcc/tree-ssa-phiopt.c +@@ -34,6 +34,8 @@ + #include "langhooks.h" + #include "pointer-set.h" + #include "domwalk.h" ++#include "cfgloop.h" ++#include "tree-data-ref.h" + + static unsigned int tree_ssa_phiopt (void); + static unsigned int tree_ssa_phiopt_worker (bool); +@@ -1303,35 +1305,18 @@ + return true; } -+/* Function vect_recog_mixed_size_cond_pattern -+ -+ Try to find the following pattern: +-/* Do the main work of conditional store replacement. We already know +- that the recognized pattern looks like so: +- +- split: +- if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) +- THEN_BB: +- X = Y; +- goto JOIN_BB; +- ELSE_BB: +- X = Z; +- fallthrough (edge E0) +- JOIN_BB: +- some more +- +- We check that THEN_BB and ELSE_BB contain only one store +- that the stores have a "simple" RHS. */ ++/* Do the main work of conditional store replacement. */ + + static bool +-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, +- basic_block join_bb) ++cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb, ++ basic_block join_bb, gimple then_assign, ++ gimple else_assign) + { +- gimple then_assign = last_and_only_stmt (then_bb); +- gimple else_assign = last_and_only_stmt (else_bb); + tree lhs_base, lhs, then_rhs, else_rhs; + source_location then_locus, else_locus; + gimple_stmt_iterator gsi; + gimple newphi, new_stmt; + +- /* Check if then_bb and else_bb contain only one store each. */ + if (then_assign == NULL + || !gimple_assign_single_p (then_assign) + || gimple_has_volatile_ops (then_assign) +@@ -1398,6 +1383,190 @@ + return true; + } + ++/* Conditional store replacement. We already know ++ that the recognized pattern looks like so: + -+ type x_t, y_t; -+ TYPE a_T, b_T, c_T; -+ loop: -+ S1 a_T = x_t CMP y_t ? b_T : c_T; ++ split: ++ if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) ++ THEN_BB: ++ ... ++ X = Y; ++ ... ++ goto JOIN_BB; ++ ELSE_BB: ++ ... ++ X = Z; ++ ... ++ fallthrough (edge E0) ++ JOIN_BB: ++ some more + -+ where type 'TYPE' is an integral type which has different size -+ from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider -+ than 'type', the constants need to fit into an integer type -+ with the same width as 'type') or results of conversion from 'type'. ++ We check that it is safe to sink the store to JOIN_BB by verifying that ++ there are no read-after-write or write-after-write dependencies in ++ THEN_BB and ELSE_BB. */ + -+ Input: ++static bool ++cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, ++ basic_block join_bb) ++{ ++ gimple then_assign = last_and_only_stmt (then_bb); ++ gimple else_assign = last_and_only_stmt (else_bb); ++ VEC (data_reference_p, heap) *then_datarefs, *else_datarefs; ++ VEC (ddr_p, heap) *then_ddrs, *else_ddrs; ++ gimple then_store, else_store; ++ bool found, ok = false, res; ++ struct data_dependence_relation *ddr; ++ data_reference_p then_dr, else_dr; ++ int i, j; ++ tree then_lhs, else_lhs; ++ VEC (gimple, heap) *then_stores, *else_stores; ++ basic_block blocks[3]; + -+ * LAST_STMT: A stmt from which the pattern search begins. ++ if (MAX_STORES_TO_SINK == 0) ++ return false; + -+ Output: ++ /* Handle the case with single statement in THEN_BB and ELSE_BB. */ ++ if (then_assign && else_assign) ++ return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, ++ then_assign, else_assign); + -+ * TYPE_IN: The type of the input arguments to the pattern. ++ /* Find data references. */ ++ then_datarefs = VEC_alloc (data_reference_p, heap, 1); ++ else_datarefs = VEC_alloc (data_reference_p, heap, 1); ++ if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs) ++ == chrec_dont_know) ++ || !VEC_length (data_reference_p, then_datarefs) ++ || (find_data_references_in_bb (NULL, else_bb, &else_datarefs) ++ == chrec_dont_know) ++ || !VEC_length (data_reference_p, else_datarefs)) ++ { ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ return false; ++ } + -+ * TYPE_OUT: The type of the output of this pattern. ++ /* Find pairs of stores with equal LHS. */ ++ then_stores = VEC_alloc (gimple, heap, 1); ++ else_stores = VEC_alloc (gimple, heap, 1); ++ FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr) ++ { ++ if (DR_IS_READ (then_dr)) ++ continue; + -+ * Return value: A new stmt that will be used to replace the pattern. -+ Additionally a def_stmt is added. ++ then_store = DR_STMT (then_dr); ++ then_lhs = gimple_assign_lhs (then_store); ++ found = false; + -+ a_it = x_t CMP y_t ? b_it : c_it; -+ a_T = (TYPE) a_it; */ ++ FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr) ++ { ++ if (DR_IS_READ (else_dr)) ++ continue; + -+static gimple -+vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, -+ tree *type_out) -+{ -+ gimple last_stmt = VEC_index (gimple, *stmts, 0); -+ tree cond_expr, then_clause, else_clause; -+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; -+ tree type, vectype, comp_vectype, comp_type, op, tmp; -+ enum machine_mode cmpmode; -+ gimple pattern_stmt, def_stmt; -+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -+ tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; -+ gimple def_stmt0 = NULL, def_stmt1 = NULL; -+ bool promotion; -+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); ++ else_store = DR_STMT (else_dr); ++ else_lhs = gimple_assign_lhs (else_store); + -+ if (!is_gimple_assign (last_stmt) -+ || gimple_assign_rhs_code (last_stmt) != COND_EXPR -+ || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) -+ return NULL; ++ if (operand_equal_p (then_lhs, else_lhs, 0)) ++ { ++ found = true; ++ break; ++ } ++ } + -+ op = gimple_assign_rhs1 (last_stmt); -+ cond_expr = TREE_OPERAND (op, 0); -+ then_clause = TREE_OPERAND (op, 1); -+ else_clause = TREE_OPERAND (op, 2); ++ if (!found) ++ continue; + -+ if (!COMPARISON_CLASS_P (cond_expr)) -+ return NULL; ++ VEC_safe_push (gimple, heap, then_stores, then_store); ++ VEC_safe_push (gimple, heap, else_stores, else_store); ++ } + -+ type = gimple_expr_type (last_stmt); -+ comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); -+ comp_vectype = get_vectype_for_scalar_type (comp_type); -+ if (comp_vectype == NULL_TREE) -+ return NULL; ++ /* No pairs of stores found. */ ++ if (!VEC_length (gimple, then_stores) ++ || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK) ++ { ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ return false; ++ } + -+ if (types_compatible_p (type, comp_type) -+ || !INTEGRAL_TYPE_P (comp_type) -+ || !INTEGRAL_TYPE_P (type)) -+ return NULL; ++ /* Compute and check data dependencies in both basic blocks. */ ++ then_ddrs = VEC_alloc (ddr_p, heap, 1); ++ else_ddrs = VEC_alloc (ddr_p, heap, 1); ++ compute_all_dependences (then_datarefs, &then_ddrs, NULL, false); ++ compute_all_dependences (else_datarefs, &else_ddrs, NULL, false); ++ blocks[0] = then_bb; ++ blocks[1] = else_bb; ++ blocks[2] = join_bb; ++ renumber_gimple_stmt_uids_in_blocks (blocks, 3); + -+ if ((TREE_CODE (then_clause) != INTEGER_CST -+ && !type_conversion_p (then_clause, last_stmt, false, &orig_type0, -+ &def_stmt0, &promotion)) -+ || (TREE_CODE (else_clause) != INTEGER_CST -+ && !type_conversion_p (else_clause, last_stmt, false, &orig_type1, -+ &def_stmt1, &promotion))) -+ return NULL; ++ /* Check that there are no read-after-write or write-after-write dependencies ++ in THEN_BB. */ ++ FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr) ++ { ++ struct data_reference *dra = DDR_A (ddr); ++ struct data_reference *drb = DDR_B (ddr); + -+ if (orig_type0 && orig_type1 -+ && (!types_compatible_p (orig_type0, orig_type1) -+ || !types_compatible_p (orig_type0, comp_type))) -+ return NULL; ++ if (DDR_ARE_DEPENDENT (ddr) != chrec_known ++ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) ++ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) ++ || (DR_IS_READ (drb) && DR_IS_WRITE (dra) ++ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) ++ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) ++ { ++ free_dependence_relations (then_ddrs); ++ free_dependence_relations (else_ddrs); ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ return false; ++ } ++ } + -+ if (orig_type0) -+ then_clause = gimple_assign_rhs1 (def_stmt0); ++ /* Check that there are no read-after-write or write-after-write dependencies ++ in ELSE_BB. */ ++ FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr) ++ { ++ struct data_reference *dra = DDR_A (ddr); ++ struct data_reference *drb = DDR_B (ddr); + -+ if (orig_type1) -+ else_clause = gimple_assign_rhs1 (def_stmt1); ++ if (DDR_ARE_DEPENDENT (ddr) != chrec_known ++ && ((DR_IS_READ (dra) && DR_IS_WRITE (drb) ++ && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb))) ++ || (DR_IS_READ (drb) && DR_IS_WRITE (dra) ++ && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra))) ++ || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)))) ++ { ++ free_dependence_relations (then_ddrs); ++ free_dependence_relations (else_ddrs); ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); ++ return false; ++ } ++ } + -+ cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); -+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) -+ return NULL; ++ /* Sink stores with same LHS. */ ++ FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store) ++ { ++ else_store = VEC_index (gimple, else_stores, i); ++ res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb, ++ then_store, else_store); ++ ok = ok || res; ++ } + -+ vectype = get_vectype_for_scalar_type (type); -+ if (vectype == NULL_TREE) -+ return NULL; ++ free_dependence_relations (then_ddrs); ++ free_dependence_relations (else_ddrs); ++ free_data_refs (then_datarefs); ++ free_data_refs (else_datarefs); ++ VEC_free (gimple, heap, then_stores); ++ VEC_free (gimple, heap, else_stores); + -+ if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype))) -+ return NULL; ++ return ok; ++} + -+ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode) -+ && ((TREE_CODE (then_clause) == INTEGER_CST -+ && !int_fits_type_p (then_clause, comp_type)) -+ || (TREE_CODE (else_clause) == INTEGER_CST -+ && !int_fits_type_p (else_clause, comp_type)))) -+ return NULL; + /* Always do these optimizations if we have SSA + trees to work on. */ + static bool +--- a/src/gcc/tree-ssa-pre.c ++++ b/src/gcc/tree-ssa-pre.c +@@ -2640,11 +2640,13 @@ + } + + /* Return true if we can value number the call in STMT. This is true +- if we have a pure or constant call. */ ++ if we have a pure or constant call to a real function. */ + + static bool + can_value_number_call (gimple stmt) + { ++ if (gimple_call_internal_p (stmt)) ++ return false; + if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) + return true; + return false; +@@ -4173,6 +4175,7 @@ + gimple_stmt_iterator gsi; + gimple stmt; + unsigned i; ++ tree fn; + + FOR_EACH_BB (b) + { +@@ -4364,9 +4367,10 @@ + /* Visit indirect calls and turn them into direct calls if + possible. */ + if (is_gimple_call (stmt) +- && TREE_CODE (gimple_call_fn (stmt)) == SSA_NAME) ++ && (fn = gimple_call_fn (stmt)) ++ && TREE_CODE (fn) == SSA_NAME) + { +- tree fn = VN_INFO (gimple_call_fn (stmt))->valnum; ++ fn = VN_INFO (fn)->valnum; + if (TREE_CODE (fn) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL) + { +--- a/src/gcc/tree-ssa-sccvn.c ++++ b/src/gcc/tree-ssa-sccvn.c +@@ -2986,7 +2986,8 @@ + /* ??? We should handle stores from calls. */ + else if (TREE_CODE (lhs) == SSA_NAME) + { +- if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) ++ if (!gimple_call_internal_p (stmt) ++ && gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST)) + changed = visit_reference_op_call (lhs, stmt); + else + changed = defs_to_varying (stmt); +--- a/src/gcc/tree-ssa-structalias.c ++++ b/src/gcc/tree-ssa-structalias.c +@@ -4330,6 +4330,7 @@ + /* Fallthru to general call handling. */; + } + if (!in_ipa_mode ++ || gimple_call_internal_p (t) + || (fndecl + && (!(fi = lookup_vi_for_tree (fndecl)) + || !fi->is_fn_info))) +--- a/src/gcc/tree-vect-data-refs.c ++++ b/src/gcc/tree-vect-data-refs.c +@@ -43,6 +43,45 @@ + #include "expr.h" + #include "optabs.h" + ++/* Return true if load- or store-lanes optab OPTAB is implemented for ++ COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ + -+ tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), -+ fold_convert (comp_type, then_clause), -+ fold_convert (comp_type, else_clause)); -+ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), -+ tmp); ++static bool ++vect_lanes_optab_supported_p (const char *name, convert_optab optab, ++ tree vectype, unsigned HOST_WIDE_INT count) ++{ ++ enum machine_mode mode, array_mode; ++ bool limit_p; + -+ pattern_stmt -+ = gimple_build_assign_with_ops (NOP_EXPR, -+ vect_recog_temp_ssa_var (type, NULL), -+ gimple_assign_lhs (def_stmt), NULL_TREE); ++ mode = TYPE_MODE (vectype); ++ limit_p = !targetm.array_mode_supported_p (mode, count); ++ array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode), ++ MODE_INT, limit_p); + -+ STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; -+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); -+ set_vinfo_for_stmt (def_stmt, def_stmt_info); -+ STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; -+ *type_in = vectype; -+ *type_out = vectype; ++ if (array_mode == BLKmode) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]", ++ GET_MODE_NAME (mode), count); ++ return false; ++ } ++ ++ if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "cannot use %s<%s><%s>", ++ name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); ++ return false; ++ } + + if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: "); ++ fprintf (vect_dump, "can use %s<%s><%s>", ++ name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode)); + -+ return pattern_stmt; ++ return true; +} + + -+/* Mark statements that are involved in a pattern. */ -+ -+static inline void -+vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, -+ tree pattern_vectype) -+{ -+ stmt_vec_info pattern_stmt_info, def_stmt_info; -+ stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); -+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); -+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info); -+ gimple def_stmt; -+ -+ set_vinfo_for_stmt (pattern_stmt, -+ new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo)); -+ gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); -+ pattern_stmt_info = vinfo_for_stmt (pattern_stmt); + /* Return the smallest scalar part of STMT. + This is used to determine the vectype of the stmt. We generally set the + vectype according to the type of the result (lhs). For stmts whose +@@ -72,6 +111,7 @@ + if (is_gimple_assign (stmt) + && (gimple_assign_cast_p (stmt) + || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR ++ || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR + || gimple_assign_rhs_code (stmt) == FLOAT_EXPR)) + { + tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); +@@ -289,39 +329,6 @@ + } + } + +- +-/* Function vect_equal_offsets. +- +- Check if OFFSET1 and OFFSET2 are identical expressions. */ +- +-static bool +-vect_equal_offsets (tree offset1, tree offset2) +-{ +- bool res; +- +- STRIP_NOPS (offset1); +- STRIP_NOPS (offset2); +- +- if (offset1 == offset2) +- return true; +- +- if (TREE_CODE (offset1) != TREE_CODE (offset2) +- || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) +- return false; +- +- res = vect_equal_offsets (TREE_OPERAND (offset1, 0), +- TREE_OPERAND (offset2, 0)); +- +- if (!res || !BINARY_CLASS_P (offset1)) +- return res; +- +- res = vect_equal_offsets (TREE_OPERAND (offset1, 1), +- TREE_OPERAND (offset2, 1)); +- +- return res; +-} +- +- + /* Check dependence between DRA and DRB for basic block vectorization. + If the accesses share same bases and offsets, we can compare their initial + constant offsets to decide whether they differ or not. In case of a read- +@@ -347,12 +354,8 @@ + + /* Check that the data-refs have same bases and offsets. If not, we can't + determine if they are dependent. */ +- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) +- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR +- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR +- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) +- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) +- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))) ++ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) ++ || !dr_equal_offsets_p (dra, drb)) + return true; + + /* Check the types. */ +@@ -397,12 +400,8 @@ + + /* Check that the data-refs have same first location (except init) and they + are both either store or load (not load and store). */ +- if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) +- && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR +- || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR +- || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) +- != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) +- || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)) ++ if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0) ++ || !dr_equal_offsets_p (dra, drb) + || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) + || DR_IS_READ (dra) != DR_IS_READ (drb)) + return false; +@@ -609,6 +608,11 @@ + if (vect_check_interleaving (dra, drb)) + return false; + ++ /* Read-read is OK (we need this check here, after checking for ++ interleaving). */ ++ if (DR_IS_READ (dra) && DR_IS_READ (drb)) ++ return false; + -+ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; -+ STMT_VINFO_DEF_TYPE (pattern_stmt_info) -+ = STMT_VINFO_DEF_TYPE (orig_stmt_info); -+ STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; -+ STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; -+ STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; -+ STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) -+ = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); -+ if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) + if (vect_print_dump_info (REPORT_DR_DETAILS)) + { + fprintf (vect_dump, "can't determine dependence between "); +@@ -841,6 +845,24 @@ + } + } + ++ /* Similarly, if we're doing basic-block vectorization, we can only use ++ base and misalignment information relative to an innermost loop if the ++ misalignment stays the same throughout the execution of the loop. ++ As above, this is the case if the stride of the dataref evenly divides ++ by the vector size. */ ++ if (!loop) + { -+ def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); -+ def_stmt_info = vinfo_for_stmt (def_stmt); -+ if (def_stmt_info == NULL) ++ tree step = DR_STEP (dr); ++ HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); ++ ++ if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) + { -+ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); -+ set_vinfo_for_stmt (def_stmt, def_stmt_info); ++ if (vect_print_dump_info (REPORT_ALIGNMENT)) ++ fprintf (vect_dump, "SLP: step doesn't divide the vector-size."); ++ misalign = NULL_TREE; + } -+ gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); -+ STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; -+ STMT_VINFO_DEF_TYPE (def_stmt_info) -+ = STMT_VINFO_DEF_TYPE (orig_stmt_info); -+ if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) -+ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; + } -+} ++ + base = build_fold_indirect_ref (base_addr); + alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); - /* Function vect_pattern_recog_1 +@@ -1053,6 +1075,9 @@ + gimple stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + ++ if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ continue; ++ + /* For interleaving, only the alignment of the first access matters. + Skip statements marked as not vectorizable. */ + if ((STMT_VINFO_STRIDED_ACCESS (stmt_info) +@@ -1171,17 +1196,11 @@ + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + int ncopies = vf / nunits; +- bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true); + +- if (!supportable_dr_alignment) +- *inside_cost = VECT_MAX_COST; ++ if (DR_IS_READ (dr)) ++ vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost); + else +- { +- if (DR_IS_READ (dr)) +- vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost); +- else +- vect_get_store_cost (dr, ncopies, inside_cost); +- } ++ vect_get_store_cost (dr, ncopies, inside_cost); -@@ -669,29 +1703,33 @@ + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, " +@@ -1250,7 +1269,9 @@ + vect_peel_info elem = (vect_peel_info) *slot; + vect_peel_extended_info max = (vect_peel_extended_info) data; - static void - vect_pattern_recog_1 ( -- gimple (* vect_recog_func) (gimple, tree *, tree *), -- gimple_stmt_iterator si) -+ gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), -+ gimple_stmt_iterator si, -+ VEC (gimple, heap) **stmts_to_replace) +- if (elem->count > max->peel_info.count) ++ if (elem->count > max->peel_info.count ++ || (elem->count == max->peel_info.count ++ && max->peel_info.npeel > elem->npeel)) + { + max->peel_info.npeel = elem->npeel; + max->peel_info.count = elem->count; +@@ -1493,7 +1514,7 @@ + stmt = DR_STMT (dr); + stmt_info = vinfo_for_stmt (stmt); + +- if (!STMT_VINFO_RELEVANT (stmt_info)) ++ if (!STMT_VINFO_RELEVANT_P (stmt_info)) + continue; + + /* For interleaving, only the alignment of the first access +@@ -2256,19 +2277,6 @@ + return false; + } + +- /* FORNOW: we handle only interleaving that is a power of 2. +- We don't fail here if it may be still possible to vectorize the +- group using SLP. If not, the size of the group will be checked in +- vect_analyze_operations, and the vectorization will fail. */ +- if (exact_log2 (stride) == -1) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "interleaving is not a power of 2"); +- +- if (slp_impossible) +- return false; +- } +- + if (stride == 0) + stride = count; + +@@ -2993,31 +3001,33 @@ + + /* Function vect_create_data_ref_ptr. + +- Create a new pointer to vector type (vp), that points to the first location +- accessed in the loop by STMT, along with the def-use update chain to +- appropriately advance the pointer through the loop iterations. Also set +- aliasing information for the pointer. This vector pointer is used by the +- callers to this function to create a memory reference expression for vector +- load/store access. ++ Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first ++ location accessed in the loop by STMT, along with the def-use update ++ chain to appropriately advance the pointer through the loop iterations. ++ Also set aliasing information for the pointer. This pointer is used by ++ the callers to this function to create a memory reference expression for ++ vector load/store access. + + Input: + 1. STMT: a stmt that references memory. Expected to be of the form + GIMPLE_ASSIGN or + GIMPLE_ASSIGN . +- 2. AT_LOOP: the loop where the vector memref is to be created. +- 3. OFFSET (optional): an offset to be added to the initial address accessed ++ 2. AGGR_TYPE: the type of the reference, which should be either a vector ++ or an array. ++ 3. AT_LOOP: the loop where the vector memref is to be created. ++ 4. OFFSET (optional): an offset to be added to the initial address accessed + by the data-ref in STMT. +- 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain ++ 5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain + pointing to the initial address. +- 5. TYPE: if not NULL indicates the required type of the data-ref. ++ 6. TYPE: if not NULL indicates the required type of the data-ref. + + Output: + 1. Declare a new ptr to vector_type, and have it point to the base of the + data reference (initial addressed accessed by the data reference). + For example, for vector of type V8HI, the following code is generated: + +- v8hi *vp; +- vp = (v8hi *)initial_address; ++ v8hi *ap; ++ ap = (v8hi *)initial_address; + + if OFFSET is not supplied: + initial_address = &a[init]; +@@ -3037,7 +3047,7 @@ + 4. Return the pointer. */ + + tree +-vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ++vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop, + tree offset, tree *initial_address, gimple *ptr_incr, + bool only_init, bool *inv_p) { - gimple stmt = gsi_stmt (si), pattern_stmt; -- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); -- stmt_vec_info pattern_stmt_info; -- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); -+ stmt_vec_info stmt_info; -+ loop_vec_info loop_vinfo; - tree pattern_vectype; - tree type_in, type_out; - enum tree_code code; - int i; - gimple next; +@@ -3047,17 +3057,16 @@ + struct loop *loop = NULL; + bool nested_in_vect_loop = false; + struct loop *containing_loop = NULL; +- tree vectype = STMT_VINFO_VECTYPE (stmt_info); +- tree vect_ptr_type; +- tree vect_ptr; ++ tree aggr_ptr_type; ++ tree aggr_ptr; + tree new_temp; + gimple vec_stmt; + gimple_seq new_stmt_list = NULL; + edge pe = NULL; + basic_block new_bb; +- tree vect_ptr_init; ++ tree aggr_ptr_init; + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); +- tree vptr; ++ tree aptr; + gimple_stmt_iterator incr_gsi; + bool insert_after; + bool negative; +@@ -3068,6 +3077,9 @@ + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + tree base; -- pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); -+ VEC_truncate (gimple, *stmts_to_replace, 0); -+ VEC_quick_push (gimple, *stmts_to_replace, stmt); -+ pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); - if (!pattern_stmt) - return; ++ gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE ++ || TREE_CODE (aggr_type) == VECTOR_TYPE); ++ + if (loop_vinfo) + { + loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -3102,8 +3114,9 @@ + if (vect_print_dump_info (REPORT_DETAILS)) + { + tree data_ref_base = base_name; +- fprintf (vect_dump, "create vector-pointer variable to type: "); +- print_generic_expr (vect_dump, vectype, TDF_SLIM); ++ fprintf (vect_dump, "create %s-pointer variable to type: ", ++ tree_code_name[(int) TREE_CODE (aggr_type)]); ++ print_generic_expr (vect_dump, aggr_type, TDF_SLIM); + if (TREE_CODE (data_ref_base) == VAR_DECL + || TREE_CODE (data_ref_base) == ARRAY_REF) + fprintf (vect_dump, " vectorizing an array ref: "); +@@ -3114,27 +3127,28 @@ + print_generic_expr (vect_dump, base_name, TDF_SLIM); + } -+ stmt = VEC_last (gimple, *stmts_to_replace); -+ stmt_info = vinfo_for_stmt (stmt); -+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); -+ - if (VECTOR_MODE_P (TYPE_MODE (type_in))) +- /* (1) Create the new vector-pointer variable. */ +- vect_ptr_type = build_pointer_type (vectype); ++ /* (1) Create the new aggregate-pointer variable. */ ++ aggr_ptr_type = build_pointer_type (aggr_type); + base = get_base_address (DR_REF (dr)); + if (base + && TREE_CODE (base) == MEM_REF) +- vect_ptr_type +- = build_qualified_type (vect_ptr_type, ++ aggr_ptr_type ++ = build_qualified_type (aggr_ptr_type, + TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0)))); +- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, ++ aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, + get_name (base_name)); + +- /* Vector types inherit the alias set of their component type by default so +- we need to use a ref-all pointer if the data reference does not conflict +- with the created vector data reference because it is not addressable. */ +- if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), ++ /* Vector and array types inherit the alias set of their component ++ type by default so we need to use a ref-all pointer if the data ++ reference does not conflict with the created aggregated data ++ reference because it is not addressable. */ ++ if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), + get_alias_set (DR_REF (dr)))) { - /* No need to check target support (already checked by the pattern - recognition function). */ -- if (type_out) -- gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out))); - pattern_vectype = type_out ? type_out : type_in; +- vect_ptr_type +- = build_pointer_type_for_mode (vectype, +- TYPE_MODE (vect_ptr_type), true); +- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, ++ aggr_ptr_type ++ = build_pointer_type_for_mode (aggr_type, ++ TYPE_MODE (aggr_ptr_type), true); ++ aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, + get_name (base_name)); + } + +@@ -3145,14 +3159,14 @@ + do + { + tree lhs = gimple_assign_lhs (orig_stmt); +- if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), ++ if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr), + get_alias_set (lhs))) + { +- vect_ptr_type +- = build_pointer_type_for_mode (vectype, +- TYPE_MODE (vect_ptr_type), true); +- vect_ptr +- = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, ++ aggr_ptr_type ++ = build_pointer_type_for_mode (aggr_type, ++ TYPE_MODE (aggr_ptr_type), true); ++ aggr_ptr ++ = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, + get_name (base_name)); + break; + } +@@ -3162,7 +3176,7 @@ + while (orig_stmt); + } + +- add_referenced_var (vect_ptr); ++ add_referenced_var (aggr_ptr); + + /* Note: If the dataref is in an inner-loop nested in LOOP, and we are + vectorizing LOOP (i.e., outer-loop vectorization), we need to create two +@@ -3195,8 +3209,8 @@ + vp2 = vp1 + step + if () goto LOOP */ + +- /* (2) Calculate the initial address the vector-pointer, and set +- the vector-pointer to point to it before the loop. */ ++ /* (2) Calculate the initial address of the aggregate-pointer, and set ++ the aggregate-pointer to point to it before the loop. */ + + /* Create: (&(base[init_val+offset]) in the loop preheader. */ + +@@ -3215,17 +3229,17 @@ + + *initial_address = new_temp; + +- /* Create: p = (vectype *) initial_base */ ++ /* Create: p = (aggr_type *) initial_base */ + if (TREE_CODE (new_temp) != SSA_NAME +- || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp))) ++ || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp))) + { +- vec_stmt = gimple_build_assign (vect_ptr, +- fold_convert (vect_ptr_type, new_temp)); +- vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); ++ vec_stmt = gimple_build_assign (aggr_ptr, ++ fold_convert (aggr_ptr_type, new_temp)); ++ aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt); + /* Copy the points-to information if it exists. */ + if (DR_PTR_INFO (dr)) +- duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); +- gimple_assign_set_lhs (vec_stmt, vect_ptr_init); ++ duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr)); ++ gimple_assign_set_lhs (vec_stmt, aggr_ptr_init); + if (pe) + { + new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt); +@@ -3235,19 +3249,19 @@ + gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT); } else -@@ -736,22 +1774,32 @@ +- vect_ptr_init = new_temp; ++ aggr_ptr_init = new_temp; + +- /* (3) Handle the updating of the vector-pointer inside the loop. ++ /* (3) Handle the updating of the aggregate-pointer inside the loop. + This is needed when ONLY_INIT is false, and also when AT_LOOP is the + inner-loop nested in LOOP (during outer-loop vectorization). */ + + /* No update in loop is required. */ + if (only_init && (!loop_vinfo || at_loop == loop)) +- vptr = vect_ptr_init; ++ aptr = aggr_ptr_init; + else + { +- /* The step of the vector pointer is the Vector Size. */ +- tree step = TYPE_SIZE_UNIT (vectype); ++ /* The step of the aggregate pointer is the type size. */ ++ tree step = TYPE_SIZE_UNIT (aggr_type); + /* One exception to the above is when the scalar step of the load in + LOOP is zero. In this case the step here is also zero. */ + if (*inv_p) +@@ -3257,9 +3271,9 @@ + + standard_iv_increment_position (loop, &incr_gsi, &insert_after); + +- create_iv (vect_ptr_init, +- fold_convert (vect_ptr_type, step), +- vect_ptr, loop, &incr_gsi, insert_after, ++ create_iv (aggr_ptr_init, ++ fold_convert (aggr_ptr_type, step), ++ aggr_ptr, loop, &incr_gsi, insert_after, + &indx_before_incr, &indx_after_incr); + incr = gsi_stmt (incr_gsi); + set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); +@@ -3273,14 +3287,14 @@ + if (ptr_incr) + *ptr_incr = incr; + +- vptr = indx_before_incr; ++ aptr = indx_before_incr; } - /* Mark the stmts that are involved in the pattern. */ -- gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); -- set_vinfo_for_stmt (pattern_stmt, -- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); -- pattern_stmt_info = vinfo_for_stmt (pattern_stmt); -- -- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; -- STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); -- STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; -- STMT_VINFO_IN_PATTERN_P (stmt_info) = true; -- STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; -+ vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); + if (!nested_in_vect_loop || only_init) +- return vptr; ++ return aptr; + + +- /* (4) Handle the updating of the vector-pointer inside the inner-loop ++ /* (4) Handle the updating of the aggregate-pointer inside the inner-loop + nested in LOOP, if exists. */ + + gcc_assert (nested_in_vect_loop); +@@ -3288,7 +3302,7 @@ + { + standard_iv_increment_position (containing_loop, &incr_gsi, + &insert_after); +- create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr, ++ create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr, + containing_loop, &incr_gsi, insert_after, &indx_before_incr, + &indx_after_incr); + incr = gsi_stmt (incr_gsi); +@@ -3425,13 +3439,22 @@ + and FALSE otherwise. */ + + bool +-vect_strided_store_supported (tree vectype) ++vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) + { + optab interleave_high_optab, interleave_low_optab; + enum machine_mode mode; - /* Patterns cannot be vectorized using SLP, because they change the order of - computation. */ -- FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) -- if (next == stmt) -- VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); -+ if (loop_vinfo) -+ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) -+ if (next == stmt) -+ VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); -+ -+ /* It is possible that additional pattern stmts are created and inserted in -+ STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the -+ relevant statements. */ -+ for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) -+ && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); -+ i++) + mode = TYPE_MODE (vectype); + ++ /* vect_permute_store_chain requires the group size to be a power of two. */ ++ if (exact_log2 (count) == -1) + { -+ stmt_info = vinfo_for_stmt (stmt); -+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "additional pattern stmt: "); -+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); -+ } -+ -+ vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); ++ fprintf (vect_dump, "the size of the group of strided accesses" ++ " is not a power of 2"); ++ return false; + } ++ + /* Check that the operation is supported. */ + interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, + vectype, optab_default); +@@ -3456,6 +3479,18 @@ } -@@ -761,8 +1809,8 @@ - LOOP_VINFO - a struct_loop_info of a loop in which we want to look for - computation idioms. - -- Output - for each computation idiom that is detected we insert a new stmt -- that provides the same functionality and that can be vectorized. We -+ Output - for each computation idiom that is detected we create a new stmt -+ that provides the same functionality and that can be vectorized. We - also record some information in the struct_stmt_info of the relevant - stmts, as explained below: - -@@ -777,79 +1825,113 @@ - S5: ... = ..use(a_0).. - - - - - Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be -- represented by a single stmt. We then: -- - create a new stmt S6 that will replace the pattern. -- - insert the new stmt S6 before the last stmt in the pattern -+ represented by a single stmt. We then: -+ - create a new stmt S6 equivalent to the pattern (the stmt is not -+ inserted into the code) - - fill in the STMT_VINFO fields as follows: - - in_pattern_p related_stmt vec_stmt - S1: a_i = .... - - - - S2: a_2 = ..use(a_i).. - - - - S3: a_1 = ..use(a_2).. - - - -- > S6: a_new = .... - S4 - - S4: a_0 = ..use(a_1).. true S6 - -+ '---> S6: a_new = .... - S4 - - S5: ... = ..use(a_0).. - - - ++/* Return TRUE if vec_store_lanes is available for COUNT vectors of ++ type VECTYPE. */ ++ ++bool ++vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) ++{ ++ return vect_lanes_optab_supported_p ("vec_store_lanes", ++ vec_store_lanes_optab, ++ vectype, count); ++} ++ ++ + /* Function vect_permute_store_chain. - (the last stmt in the pattern (S4) and the new pattern stmt (S6) point -- to each other through the RELATED_STMT field). -+ to each other through the RELATED_STMT field). + Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be +@@ -3517,7 +3552,7 @@ + I3: 4 12 20 28 5 13 21 30 + I4: 6 14 22 30 7 15 23 31. */ - S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead - of S4 because it will replace all its uses. Stmts {S1,S2,S3} will - remain irrelevant unless used by stmts other than S4. +-bool ++void + vect_permute_store_chain (VEC(tree,heap) *dr_chain, + unsigned int length, + gimple stmt, +@@ -3531,9 +3566,7 @@ + unsigned int j; + enum tree_code high_code, low_code; - If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} -- (because they are marked as irrelevant). It will vectorize S6, and record -- a pointer to the new vector stmt VS6 both from S6 (as usual), and also -- from S4. We do that so that when we get to vectorizing stmts that use the -- def of S4 (like S5 that uses a_0), we'll know where to take the relevant -- vector-def from. S4 will be skipped, and S5 will be vectorized as usual: -+ (because they are marked as irrelevant). It will vectorize S6, and record -+ a pointer to the new vector stmt VS6 from S6 (as usual). -+ S4 will be skipped, and S5 will be vectorized as usual: +- /* Check that the operation is supported. */ +- if (!vect_strided_store_supported (vectype)) +- return false; ++ gcc_assert (vect_strided_store_supported (vectype, length)); - in_pattern_p related_stmt vec_stmt - S1: a_i = .... - - - - S2: a_2 = ..use(a_i).. - - - - S3: a_1 = ..use(a_2).. - - - - > VS6: va_new = .... - - - -- S6: a_new = .... - S4 VS6 - S4: a_0 = ..use(a_1).. true S6 VS6 -+ '---> S6: a_new = .... - S4 VS6 - > VS5: ... = ..vuse(va_new).. - - - - S5: ... = ..use(a_0).. - - - + *result_chain = VEC_copy (tree, heap, dr_chain); -- DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used -+ DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used - elsewhere), and we'll end up with: +@@ -3586,7 +3619,6 @@ + } + dr_chain = VEC_copy (tree, heap, *result_chain); + } +- return true; + } - VS6: va_new = .... -- VS5: ... = ..vuse(va_new).. -+ VS5: ... = ..vuse(va_new).. + /* Function vect_setup_realignment +@@ -3756,8 +3788,9 @@ -- If vectorization does not succeed, DCE will clean S6 away (its def is -- not used), and we'll end up with the original sequence. --*/ -+ In case of more than one pattern statements, e.g., widen-mult with -+ intermediate type: -+ -+ S1 a_t = ; -+ S2 a_T = (TYPE) a_t; -+ '--> S3: a_it = (interm_type) a_t; -+ S4 prod_T = a_T * CONST; -+ '--> S5: prod_T' = a_it w* CONST; -+ -+ there may be other users of a_T outside the pattern. In that case S2 will -+ be marked as relevant (as well as S3), and both S2 and S3 will be analyzed -+ and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will -+ be recorded in S3. */ + gcc_assert (!compute_in_loop); + vec_dest = vect_create_destination_var (scalar_dest, vectype); +- ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE, +- &init_addr, &inc, true, &inv_p); ++ ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load, ++ NULL_TREE, &init_addr, &inc, ++ true, &inv_p); + new_stmt = gimple_build_assign_with_ops + (BIT_AND_EXPR, NULL_TREE, ptr, + build_int_cst (TREE_TYPE (ptr), +@@ -3862,13 +3895,22 @@ + and FALSE otherwise. */ - void --vect_pattern_recog (loop_vec_info loop_vinfo) -+vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) + bool +-vect_strided_load_supported (tree vectype) ++vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) { -- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); -- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); -- unsigned int nbbs = loop->num_nodes; -+ struct loop *loop; -+ basic_block *bbs, bb; -+ unsigned int nbbs; - gimple_stmt_iterator si; - unsigned int i, j; -- gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); -+ gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); -+ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); -+ gimple stmt; + optab perm_even_optab, perm_odd_optab; + enum machine_mode mode; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "=== vect_pattern_recog ==="); + mode = TYPE_MODE (vectype); -- /* Scan through the loop stmts, applying the pattern recognition -+ if (loop_vinfo) -+ { -+ loop = LOOP_VINFO_LOOP (loop_vinfo); -+ bbs = LOOP_VINFO_BBS (loop_vinfo); -+ nbbs = loop->num_nodes; -+ } -+ else ++ /* vect_permute_load_chain requires the group size to be a power of two. */ ++ if (exact_log2 (count) == -1) + { -+ bb = BB_VINFO_BB (bb_vinfo); -+ nbbs = 1; -+ bbs = XNEW (basic_block); -+ bbs[0] = bb; ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "the size of the group of strided accesses" ++ " is not a power of 2"); ++ return false; + } + -+ /* Scan through the stmts, applying the pattern recognition - functions starting at each stmt visited: */ - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) - { -+ if (bb_vinfo && (stmt = gsi_stmt (si)) -+ && vinfo_for_stmt (stmt) -+ && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) -+ continue; -+ - /* Scan over all generic vect_recog_xxx_pattern functions. */ - for (j = 0; j < NUM_PATTERNS; j++) - { - vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; -- vect_pattern_recog_1 (vect_recog_func_ptr, si); -+ vect_pattern_recog_1 (vect_recog_func_ptr, si, -+ &stmts_to_replace); - } - } - } -+ -+ VEC_free (gimple, heap, stmts_to_replace); -+ if (bb_vinfo) -+ free (bbs); + perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, + optab_default); + if (!perm_even_optab) +@@ -3903,6 +3945,16 @@ + return true; } ---- a/src/gcc/tree-vect-slp.c -+++ b/src/gcc/tree-vect-slp.c -@@ -67,15 +67,16 @@ - static void - vect_free_slp_tree (slp_tree node) - { -+ int i; -+ slp_void_p child; + ++/* Return TRUE if vec_load_lanes is available for COUNT vectors of ++ type VECTYPE. */ + - if (!node) - return; ++bool ++vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count) ++{ ++ return vect_lanes_optab_supported_p ("vec_load_lanes", ++ vec_load_lanes_optab, ++ vectype, count); ++} -- if (SLP_TREE_LEFT (node)) -- vect_free_slp_tree (SLP_TREE_LEFT (node)); -- -- if (SLP_TREE_RIGHT (node)) -- vect_free_slp_tree (SLP_TREE_RIGHT (node)); -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_free_slp_tree ((slp_tree) child); + /* Function vect_permute_load_chain. -+ VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node)); - VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); +@@ -3980,7 +4032,7 @@ + 3rd vec (E2): 2 6 10 14 18 22 26 30 + 4th vec (E4): 3 7 11 15 19 23 27 31. */ - if (SLP_TREE_VEC_STMTS (node)) -@@ -96,46 +97,151 @@ +-bool ++static void + vect_permute_load_chain (VEC(tree,heap) *dr_chain, + unsigned int length, + gimple stmt, +@@ -3993,9 +4045,7 @@ + int i; + unsigned int j; + +- /* Check that the operation is supported. */ +- if (!vect_strided_load_supported (vectype)) +- return false; ++ gcc_assert (vect_strided_load_supported (vectype, length)); + + *result_chain = VEC_copy (tree, heap, dr_chain); + for (i = 0; i < exact_log2 (length); i++) +@@ -4038,7 +4088,6 @@ + } + dr_chain = VEC_copy (tree, heap, *result_chain); + } +- return true; } --/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that -- they are of a legal type and that they match the defs of the first stmt of -- the SLP group (stored in FIRST_STMT_...). */ -+/* Create an SLP node for SCALAR_STMTS. */ -+ -+static slp_tree -+vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts) -+{ -+ slp_tree node; -+ gimple stmt = VEC_index (gimple, scalar_stmts, 0); -+ unsigned int nops; -+ -+ if (is_gimple_call (stmt)) -+ nops = gimple_call_num_args (stmt); -+ else if (is_gimple_assign (stmt)) -+ { -+ nops = gimple_num_ops (stmt) - 1; -+ if (gimple_assign_rhs_code (stmt) == COND_EXPR) -+ nops = 4; -+ } -+ else -+ return NULL; -+ -+ node = XNEW (struct _slp_tree); -+ SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; -+ SLP_TREE_VEC_STMTS (node) = NULL; -+ SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops); -+ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; -+ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; -+ -+ return node; +@@ -4049,24 +4098,32 @@ + the scalar statements. + */ + +-bool ++void + vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, + gimple_stmt_iterator *gsi) + { +- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +- gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info); +- gimple next_stmt, new_stmt; + VEC(tree,heap) *result_chain = NULL; +- unsigned int i, gap_count; +- tree tmp_data_ref; + + /* DR_CHAIN contains input data-refs that are a part of the interleaving. + RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted + vectors, that are ready for vector computation. */ + result_chain = VEC_alloc (tree, heap, size); +- /* Permute. */ +- if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) +- return false; ++ vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); ++ vect_record_strided_load_vectors (stmt, result_chain); ++ VEC_free (tree, heap, result_chain); +} + ++/* RESULT_CHAIN contains the output of a group of strided loads that were ++ generated as part of the vectorization of STMT. Assign the statement ++ for each vector to the associated scalar statement. */ + -+/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each -+ operand. */ -+static VEC (slp_oprnd_info, heap) * -+vect_create_oprnd_info (int nops, int group_size) ++void ++vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain) +{ -+ int i; -+ slp_oprnd_info oprnd_info; -+ VEC (slp_oprnd_info, heap) *oprnds_info; -+ -+ oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops); -+ for (i = 0; i < nops; i++) -+ { -+ oprnd_info = XNEW (struct _slp_oprnd_info); -+ oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size); -+ oprnd_info->first_dt = vect_uninitialized_def; -+ oprnd_info->first_def_type = NULL_TREE; -+ oprnd_info->first_const_oprnd = NULL_TREE; -+ oprnd_info->first_pattern = false; -+ VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info); -+ } -+ -+ return oprnds_info; -+} -+ ++ gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); ++ gimple next_stmt, new_stmt; ++ unsigned int i, gap_count; ++ tree tmp_data_ref; + + /* Put a permuted data-ref in the VECTORIZED_STMT field. + Since we scan the chain starting from it's first node, their order +@@ -4128,9 +4185,6 @@ + break; + } + } +- +- VEC_free (tree, heap, result_chain); +- return true; + } + + /* Function vect_force_dr_alignment_p. +--- a/src/gcc/tree-vect-generic.c ++++ b/src/gcc/tree-vect-generic.c +@@ -552,7 +552,9 @@ + || code == VEC_UNPACK_LO_EXPR + || code == VEC_PACK_TRUNC_EXPR + || code == VEC_PACK_SAT_EXPR +- || code == VEC_PACK_FIX_TRUNC_EXPR) ++ || code == VEC_PACK_FIX_TRUNC_EXPR ++ || code == VEC_WIDEN_LSHIFT_HI_EXPR ++ || code == VEC_WIDEN_LSHIFT_LO_EXPR) + type = TREE_TYPE (rhs1); + + /* Optabs will try converting a negation into a subtraction, so +--- a/src/gcc/tree-vect-loop.c ++++ b/src/gcc/tree-vect-loop.c +@@ -181,6 +181,8 @@ + stmt_vec_info stmt_info; + int i; + HOST_WIDE_INT dummy; ++ gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL; ++ bool analyze_pattern_stmt = false, pattern_def = false; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); +@@ -241,12 +243,20 @@ + } + } + +- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) ++ for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;) + { +- tree vf_vectype; +- gimple stmt = gsi_stmt (si); +- stmt_info = vinfo_for_stmt (stmt); ++ tree vf_vectype; + ++ if (analyze_pattern_stmt) ++ { ++ stmt = pattern_stmt; ++ analyze_pattern_stmt = false; ++ } ++ else ++ stmt = gsi_stmt (si); + -+/* Free operands info. */ ++ stmt_info = vinfo_for_stmt (stmt); ++ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "==> examining statement: "); +@@ -259,11 +269,57 @@ + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "skip."); +- continue; ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ stmt = pattern_stmt; ++ stmt_info = vinfo_for_stmt (pattern_stmt); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern statement: "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "skip."); ++ gsi_next (&si); ++ continue; ++ } + } + ++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ analyze_pattern_stmt = true; + -+static void -+vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info) -+{ -+ int i; -+ slp_oprnd_info oprnd_info; ++ /* If a pattern statement has a def stmt, analyze it too. */ ++ if (is_pattern_stmt_p (stmt_info) ++ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ { ++ if (pattern_def) ++ pattern_def = false; ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern def stmt: "); ++ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, ++ TDF_SLIM); ++ } + -+ FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info) -+ { -+ VEC_free (gimple, heap, oprnd_info->def_stmts); -+ XDELETE (oprnd_info); -+ } ++ pattern_def = true; ++ stmt = pattern_def_stmt; ++ stmt_info = vinfo_for_stmt (stmt); ++ } ++ } + -+ VEC_free (slp_oprnd_info, heap, *oprnds_info); -+} + if (gimple_get_lhs (stmt) == NULL_TREE) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +@@ -295,9 +351,7 @@ + } + else + { +- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) +- && !is_pattern_stmt_p (stmt_info)); +- ++ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); + scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); + if (vect_print_dump_info (REPORT_DETAILS)) + { +@@ -369,6 +423,9 @@ + if (!vectorization_factor + || (nunits > vectorization_factor)) + vectorization_factor = nunits; + ++ if (!analyze_pattern_stmt && !pattern_def) ++ gsi_next (&si); + } + } + +@@ -817,25 +874,17 @@ + + if (stmt_info) + { +- /* Check if this is a "pattern stmt" (introduced by the +- vectorizer during the pattern recognition pass). */ +- bool remove_stmt_p = false; +- gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); +- if (orig_stmt) +- { +- stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); +- if (orig_stmt_info +- && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) +- remove_stmt_p = true; +- } ++ /* Check if this statement has a related "pattern stmt" ++ (introduced by the vectorizer during the pattern recognition ++ pass). Free pattern's stmt_vec_info. */ ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info))) ++ free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info)); + + /* Free stmt_vec_info. */ + free_stmt_vec_info (stmt); +- +- /* Remove dead "pattern stmts". */ +- if (remove_stmt_p) +- gsi_remove (&si, true); + } + -+/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that -+ they are of a valid type and that they match the defs of the first stmt of -+ the SLP group (stored in OPRNDS_INFO). */ + gsi_next (&si); + } + } +@@ -1409,7 +1458,7 @@ - static bool - vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, - slp_tree slp_node, gimple stmt, -- VEC (gimple, heap) **def_stmts0, -- VEC (gimple, heap) **def_stmts1, -- enum vect_def_type *first_stmt_dt0, -- enum vect_def_type *first_stmt_dt1, -- tree *first_stmt_def0_type, -- tree *first_stmt_def1_type, -- tree *first_stmt_const_oprnd, -- int ncopies_for_cost, -- bool *pattern0, bool *pattern1) -+ int ncopies_for_cost, bool first, -+ VEC (slp_oprnd_info, heap) **oprnds_info) - { - tree oprnd; - unsigned int i, number_of_oprnds; -- tree def; -+ tree def, def_op0 = NULL_TREE; - gimple def_stmt; -- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; -- stmt_vec_info stmt_info = -- vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); -- enum gimple_rhs_class rhs_class; -+ enum vect_def_type dt = vect_uninitialized_def; -+ enum vect_def_type dt_op0 = vect_uninitialized_def; -+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt); -+ tree lhs = gimple_get_lhs (stmt); - struct loop *loop = NULL; -+ enum tree_code rhs_code; -+ bool different_types = false; -+ bool pattern = false; -+ slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; -+ int op_idx = 1; -+ tree compare_rhs = NULL_TREE, rhs = NULL_TREE; -+ int cond_idx = -1; + vect_analyze_scalar_cycles (loop_vinfo); - if (loop_vinfo) - loop = LOOP_VINFO_LOOP (loop_vinfo); +- vect_pattern_recog (loop_vinfo); ++ vect_pattern_recog (loop_vinfo, NULL); -- rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt)); -- number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */ -+ if (is_gimple_call (stmt)) -+ number_of_oprnds = gimple_call_num_args (stmt); -+ else if (is_gimple_assign (stmt)) -+ { -+ number_of_oprnds = gimple_num_ops (stmt) - 1; -+ if (gimple_assign_rhs_code (stmt) == COND_EXPR) -+ { -+ number_of_oprnds = 4; -+ cond_idx = 0; -+ rhs = gimple_assign_rhs1 (stmt); -+ } -+ } -+ else + /* Data-flow analysis to detect stmts that do not need to be vectorized. */ + +@@ -3242,8 +3291,8 @@ + + /* Get the loop-entry arguments. */ + if (slp_node) +- vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs, +- NULL, reduc_index); ++ vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs, ++ NULL, slp_node, reduc_index); + else + { + vec_initial_defs = VEC_alloc (tree, heap, 1); +@@ -3968,7 +4017,7 @@ + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL; + VEC (gimple, heap) *phis = NULL; + int vec_num; +- tree def0, def1, tem; ++ tree def0, def1, tem, op0, op1 = NULL_TREE; + + if (nested_in_vect_loop_p (loop, stmt)) + { +@@ -4047,6 +4096,9 @@ + gcc_unreachable (); + } + ++ if (code == COND_EXPR && slp_node) + return false; ++ + scalar_dest = gimple_assign_lhs (stmt); + scalar_type = TREE_TYPE (scalar_dest); + if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type) +@@ -4121,7 +4173,7 @@ - for (i = 0; i < number_of_oprnds; i++) + if (code == COND_EXPR) { -- oprnd = gimple_op (stmt, i + 1); -+ if (compare_rhs) -+ oprnd = compare_rhs; +- if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0)) ++ if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported condition in reduction"); +@@ -4276,6 +4328,25 @@ + return false; + } + ++ /* In case of widenning multiplication by a constant, we update the type ++ of the constant to be the type of the other operand. We check that the ++ constant fits the type in the pattern recognition pass. */ ++ if (code == DOT_PROD_EXPR ++ && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1]))) ++ { ++ if (TREE_CODE (ops[0]) == INTEGER_CST) ++ ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]); ++ else if (TREE_CODE (ops[1]) == INTEGER_CST) ++ ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]); + else -+ oprnd = gimple_op (stmt, op_idx++); ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "invalid types in dot-prod"); + -+ oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); ++ return false; ++ } ++ } + -+ if (-1 < cond_idx && cond_idx < 4) -+ { -+ if (compare_rhs) -+ compare_rhs = NULL_TREE; -+ else -+ oprnd = TREE_OPERAND (rhs, cond_idx++); -+ } -+ -+ if (COMPARISON_CLASS_P (oprnd)) -+ { -+ compare_rhs = TREE_OPERAND (oprnd, 1); -+ oprnd = TREE_OPERAND (oprnd, 0); -+ } - - if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, -- &dt[i]) -- || (!def_stmt && dt[i] != vect_constant_def)) -+ &dt) -+ || (!def_stmt && dt != vect_constant_def)) - { - if (vect_print_dump_info (REPORT_SLP)) - { -@@ -149,34 +255,32 @@ - /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt - from the pattern. Check that all the stmts of the node are in the - pattern. */ -- if (loop && def_stmt && gimple_bb (def_stmt) -- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) -+ if (def_stmt && gimple_bb (def_stmt) -+ && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) -+ || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo) -+ && gimple_code (def_stmt) != GIMPLE_PHI)) - && vinfo_for_stmt (def_stmt) -- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) -+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) -+ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) -+ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; +@@ -4374,7 +4445,7 @@ + gcc_assert (!slp_node); + vectorizable_condition (stmt, gsi, vec_stmt, + PHI_RESULT (VEC_index (gimple, phis, 0)), +- reduc_index); ++ reduc_index, NULL); + /* Multiple types are not supported for condition. */ + break; + } +@@ -4382,8 +4453,6 @@ + /* Handle uses. */ + if (j == 0) { -- if (!*first_stmt_dt0) -- *pattern0 = true; -- else -- { -- if (i == 1 && !*first_stmt_dt1) -- *pattern1 = true; -- else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1)) -- { -- if (vect_print_dump_info (REPORT_DETAILS)) -- { -- fprintf (vect_dump, "Build SLP failed: some of the stmts" -- " are in a pattern, and others are not "); -- print_generic_expr (vect_dump, oprnd, TDF_SLIM); -- } -+ pattern = true; -+ if (!first && !oprnd_info->first_pattern) -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "Build SLP failed: some of the stmts" -+ " are in a pattern, and others are not "); -+ print_generic_expr (vect_dump, oprnd, TDF_SLIM); -+ } - -- return false; -- } -+ return false; +- tree op0, op1 = NULL_TREE; +- + op0 = ops[!reduc_index]; + if (op_type == ternary_op) + { +@@ -4394,8 +4463,8 @@ } - def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); -- dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); -+ dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); - -- if (*dt == vect_unknown_def_type) -+ if (dt == vect_unknown_def_type) + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1, +- -1); ++ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, ++ slp_node, -1); + else { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Unsupported pattern."); -@@ -200,85 +304,125 @@ - } - } + loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index], +@@ -4413,11 +4482,19 @@ + { + if (!slp_node) + { +- enum vect_def_type dt = vect_unknown_def_type; /* Dummy */ +- loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0); ++ enum vect_def_type dt; ++ gimple dummy_stmt; ++ tree dummy; ++ ++ vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL, ++ &dummy_stmt, &dummy, &dt); ++ loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, ++ loop_vec_def0); + VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0); + if (op_type == ternary_op) + { ++ vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt, ++ &dummy, &dt); + loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, + loop_vec_def1); + VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1); +@@ -4722,6 +4799,8 @@ + tree cond_expr = NULL_TREE; + gimple_seq cond_expr_stmt_list = NULL; + bool do_peeling_for_loop_bound; ++ gimple stmt, pattern_stmt, pattern_def_stmt; ++ bool transform_pattern_stmt = false, pattern_def = false; -- if (!*first_stmt_dt0) -+ if (first) + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vec_transform_loop ==="); +@@ -4809,11 +4888,19 @@ + } + } + +- for (si = gsi_start_bb (bb); !gsi_end_p (si);) ++ pattern_stmt = NULL; ++ for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;) { -- /* op0 of the first stmt of the group - store its info. */ -- *first_stmt_dt0 = dt[i]; -- if (def) -- *first_stmt_def0_type = TREE_TYPE (def); -- else -- *first_stmt_const_oprnd = oprnd; -+ oprnd_info->first_dt = dt; -+ oprnd_info->first_pattern = pattern; -+ if (def) +- gimple stmt = gsi_stmt (si); + bool is_store; + ++ if (transform_pattern_stmt) + { -+ oprnd_info->first_def_type = TREE_TYPE (def); -+ oprnd_info->first_const_oprnd = NULL_TREE; ++ stmt = pattern_stmt; ++ transform_pattern_stmt = false; + } + else ++ stmt = gsi_stmt (si); ++ + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "------>vectorizing statement: "); +@@ -4836,14 +4923,54 @@ + + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) +- { +- gsi_next (&si); +- continue; + { -+ oprnd_info->first_def_type = NULL_TREE; -+ oprnd_info->first_const_oprnd = oprnd; ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ stmt = pattern_stmt; ++ stmt_info = vinfo_for_stmt (stmt); ++ } ++ else ++ { ++ gsi_next (&si); ++ continue; ++ } + } ++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ transform_pattern_stmt = true; ++ ++ /* If pattern statement has a def stmt, vectorize it too. */ ++ if (is_pattern_stmt_p (stmt_info) ++ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ { ++ if (pattern_def) ++ pattern_def = false; ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> vectorizing pattern def" ++ " stmt: "); ++ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, ++ TDF_SLIM); ++ } ++ ++ pattern_def = true; ++ stmt = pattern_def_stmt; ++ stmt_info = vinfo_for_stmt (stmt); ++ } + } -- /* Analyze costs (for the first stmt of the group only). */ -- if (rhs_class != GIMPLE_SINGLE_RHS) -- /* Not memory operation (we don't call this functions for loads). */ -- vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); -- else -- /* Store. */ -- vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node); -+ if (i == 0) -+ { -+ def_op0 = def; -+ dt_op0 = dt; -+ /* Analyze costs (for the first stmt of the group only). */ -+ if (REFERENCE_CLASS_P (lhs)) -+ /* Store. */ -+ vect_model_store_cost (stmt_info, ncopies_for_cost, false, -+ dt, slp_node); -+ else -+ /* Not memory operation (we don't call this function for -+ loads). */ -+ vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt, -+ slp_node); -+ } - } + gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); +- nunits = +- (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); ++ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( ++ STMT_VINFO_VECTYPE (stmt_info)); + if (!STMT_SLP_TYPE (stmt_info) + && nunits != (unsigned int) vectorization_factor + && vect_print_dump_info (REPORT_DETAILS)) +@@ -4868,8 +4995,9 @@ + /* Hybrid SLP stmts must be vectorized in addition to SLP. */ + if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) + { +- gsi_next (&si); +- continue; ++ if (!transform_pattern_stmt && !pattern_def) ++ gsi_next (&si); ++ continue; + } + } + +@@ -4888,7 +5016,7 @@ + the chain. */ + vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); + gsi_remove (&si, true); +- continue; ++ continue; + } + else + { +@@ -4898,7 +5026,9 @@ + continue; + } + } +- gsi_next (&si); ++ ++ if (!transform_pattern_stmt && !pattern_def) ++ gsi_next (&si); + } /* stmts in BB */ + } /* BBs in loop */ + +--- a/src/gcc/tree-vect-loop-manip.c ++++ b/src/gcc/tree-vect-loop-manip.c +@@ -1105,35 +1105,6 @@ + first_niters = PHI_RESULT (newphi); + } + +- +-/* Remove dead assignments from loop NEW_LOOP. */ +- +-static void +-remove_dead_stmts_from_loop (struct loop *new_loop) +-{ +- basic_block *bbs = get_loop_body (new_loop); +- unsigned i; +- for (i = 0; i < new_loop->num_nodes; ++i) +- { +- gimple_stmt_iterator gsi; +- for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);) +- { +- gimple stmt = gsi_stmt (gsi); +- if (is_gimple_assign (stmt) +- && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME +- && has_zero_uses (gimple_assign_lhs (stmt))) +- { +- gsi_remove (&gsi, true); +- release_defs (stmt); +- } +- else +- gsi_next (&gsi); +- } +- } +- free (bbs); +-} +- +- + /* Function slpeel_tree_peel_loop_to_edge. + + Peel the first (last) iterations of LOOP into a new prolog (epilog) loop +@@ -1445,13 +1416,6 @@ + BITMAP_FREE (definitions); + delete_update_ssa (); + +- /* Remove all pattern statements from the loop copy. They will confuse +- the expander if DCE is disabled. +- ??? The pattern recognizer should be split into an analysis and +- a transformation phase that is then run only on the loop that is +- going to be transformed. */ +- remove_dead_stmts_from_loop (new_loop); +- + adjust_vec_debug_stmts (); + + return new_loop; +--- a/src/gcc/tree-vectorizer.h ++++ b/src/gcc/tree-vectorizer.h +@@ -73,15 +73,15 @@ + /************************************************************************ + SLP + ************************************************************************/ ++typedef void *slp_void_p; ++DEF_VEC_P (slp_void_p); ++DEF_VEC_ALLOC_P (slp_void_p, heap); + +-/* A computation tree of an SLP instance. Each node corresponds to a group of ++/* A computation tree of an SLP instance. Each node corresponds to a group of + stmts to be packed in a SIMD stmt. */ + typedef struct _slp_tree { +- /* Only binary and unary operations are supported. LEFT child corresponds to +- the first operand and RIGHT child to the second if the operation is +- binary. */ +- struct _slp_tree *left; +- struct _slp_tree *right; ++ /* Nodes that contain def-stmts of this node statements operands. */ ++ VEC (slp_void_p, heap) *children; + /* A group of scalar stmts to be vectorized together. */ + VEC (gimple, heap) *stmts; + /* Vectorized stmt/s. */ +@@ -146,14 +146,32 @@ + #define SLP_INSTANCE_LOADS(S) (S)->loads + #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load + +-#define SLP_TREE_LEFT(S) (S)->left +-#define SLP_TREE_RIGHT(S) (S)->right ++#define SLP_TREE_CHILDREN(S) (S)->children + #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts + #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts + #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size + #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop + #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop + ++/* This structure is used in creation of an SLP tree. Each instance ++ corresponds to the same operand in a group of scalar stmts in an SLP ++ node. */ ++typedef struct _slp_oprnd_info ++{ ++ /* Def-stmts for the operands. */ ++ VEC (gimple, heap) *def_stmts; ++ /* Information about the first statement, its vector def-type, type, the ++ operand itself in case it's constant, and an indication if it's a pattern ++ stmt. */ ++ enum vect_def_type first_dt; ++ tree first_def_type; ++ tree first_const_oprnd; ++ bool first_pattern; ++} *slp_oprnd_info; ++ ++DEF_VEC_P(slp_oprnd_info); ++DEF_VEC_ALLOC_P(slp_oprnd_info, heap); ++ + + typedef struct _vect_peel_info + { +@@ -464,6 +482,9 @@ + pattern). */ + gimple related_stmt; - else - { -- if (!*first_stmt_dt1 && i == 1) -+ /* Not first stmt of the group, check that the def-stmt/s match -+ the def-stmt/s of the first stmt. Allow different definition -+ types for reduction chains: the first stmt must be a -+ vect_reduction_def (a phi node), and the rest -+ vect_internal_def. */ -+ if (((oprnd_info->first_dt != dt -+ && !(oprnd_info->first_dt == vect_reduction_def -+ && dt == vect_internal_def)) -+ || (oprnd_info->first_def_type != NULL_TREE -+ && def -+ && !types_compatible_p (oprnd_info->first_def_type, -+ TREE_TYPE (def)))) -+ || (!def -+ && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), -+ TREE_TYPE (oprnd))) -+ || different_types) - { -- /* op1 of the first stmt of the group - store its info. */ -- *first_stmt_dt1 = dt[i]; -- if (def) -- *first_stmt_def1_type = TREE_TYPE (def); -- else -+ if (number_of_oprnds != 2) - { -- /* We assume that the stmt contains only one constant -- operand. We fail otherwise, to be on the safe side. */ -- if (*first_stmt_const_oprnd) -- { -- if (vect_print_dump_info (REPORT_SLP)) -- fprintf (vect_dump, "Build SLP failed: two constant " -- "oprnds in stmt"); -- return false; -- } -- *first_stmt_const_oprnd = oprnd; -- } -- } -- else -- { -- /* Not first stmt of the group, check that the def-stmt/s match -- the def-stmt/s of the first stmt. */ -- if ((i == 0 -- && (*first_stmt_dt0 != dt[i] -- || (*first_stmt_def0_type && def -- && !types_compatible_p (*first_stmt_def0_type, -- TREE_TYPE (def))))) -- || (i == 1 -- && (*first_stmt_dt1 != dt[i] -- || (*first_stmt_def1_type && def -- && !types_compatible_p (*first_stmt_def1_type, -- TREE_TYPE (def))))) -- || (!def -- && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), -- TREE_TYPE (oprnd)))) -+ if (vect_print_dump_info (REPORT_SLP)) -+ fprintf (vect_dump, "Build SLP failed: different types "); ++ /* Used to keep a def stmt of a pattern stmt if such exists. */ ++ gimple pattern_def_stmt; + -+ return false; -+ } + /* List of datarefs that are known to have the same alignment as the dataref + of this stmt. */ + VEC(dr_p,heap) *same_align_refs; +@@ -531,6 +552,7 @@ + + #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p + #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt ++#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt + #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs + #define STMT_VINFO_DEF_TYPE(S) (S)->def_type + #define STMT_VINFO_DR_GROUP_FIRST_DR(S) (S)->first_dr +@@ -794,9 +816,9 @@ + extern tree vectorizable_function (gimple, tree, tree); + extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, + slp_tree); +-extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type, +- slp_tree); +-extern void vect_model_load_cost (stmt_vec_info, int, slp_tree); ++extern void vect_model_store_cost (stmt_vec_info, int, bool, ++ enum vect_def_type, slp_tree); ++extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree); + extern void vect_finish_stmt_generation (gimple, gimple, + gimple_stmt_iterator *); + extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); +@@ -810,10 +832,13 @@ + extern void vect_remove_stores (gimple); + extern bool vect_analyze_stmt (gimple, bool *, slp_tree); + extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, +- tree, int); ++ tree, int, slp_tree); + extern void vect_get_load_cost (struct data_reference *, int, bool, + unsigned int *, unsigned int *); + extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); ++extern bool vect_supportable_shift (enum tree_code, tree); ++extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **, ++ VEC (tree, heap) **, slp_tree, int); + + /* In tree-vect-data-refs.c. */ + extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); +@@ -829,21 +854,22 @@ + extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info); + extern bool vect_prune_runtime_alias_test_list (loop_vec_info); + extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *); +-extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *, +- gimple *, bool, bool *); ++extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree, ++ tree *, gimple *, bool, bool *); + extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); + extern tree vect_create_destination_var (tree, tree); +-extern bool vect_strided_store_supported (tree); +-extern bool vect_strided_load_supported (tree); +-extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, ++extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT); ++extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); ++extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT); ++extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); ++extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, + gimple_stmt_iterator *, VEC(tree,heap) **); + extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, + enum dr_alignment_support, tree, + struct loop **); +-extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, +- gimple_stmt_iterator *, VEC(tree,heap) **); +-extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, ++extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int, + gimple_stmt_iterator *); ++extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *); + extern int vect_get_place_in_interleaving_chain (gimple, gimple); + extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); + extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, +@@ -879,8 +905,9 @@ + extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); + extern void vect_make_slp_decision (loop_vec_info); + extern void vect_detect_hybrid_slp (loop_vec_info); +-extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, +- VEC (tree,heap) **, int); ++extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree, ++ VEC (slp_void_p, heap) **, int); + -+ /* Try to swap operands in case of binary operation. */ -+ if (i == 0) -+ different_types = true; -+ else - { -- if (vect_print_dump_info (REPORT_SLP)) -- fprintf (vect_dump, "Build SLP failed: different types "); -+ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); -+ if (is_gimple_assign (stmt) -+ && (rhs_code = gimple_assign_rhs_code (stmt)) -+ && TREE_CODE_CLASS (rhs_code) == tcc_binary -+ && commutative_tree_code (rhs_code) -+ && oprnd0_info->first_dt == dt -+ && oprnd_info->first_dt == dt_op0 -+ && def_op0 && def -+ && !(oprnd0_info->first_def_type -+ && !types_compatible_p (oprnd0_info->first_def_type, -+ TREE_TYPE (def))) -+ && !(oprnd_info->first_def_type -+ && !types_compatible_p (oprnd_info->first_def_type, -+ TREE_TYPE (def_op0)))) -+ { -+ if (vect_print_dump_info (REPORT_SLP)) -+ { -+ fprintf (vect_dump, "Swapping operands of "); -+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -+ } + extern LOC find_bb_location (basic_block); + extern bb_vec_info vect_slp_analyze_bb (basic_block); + extern void vect_slp_transform_bb (basic_block); +@@ -889,9 +916,9 @@ + /* Pattern recognition functions. + Additional pattern recognition functions can (and will) be added + in the future. */ +-typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); +-#define NUM_PATTERNS 4 +-void vect_pattern_recog (loop_vec_info); ++typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); ++#define NUM_PATTERNS 7 ++void vect_pattern_recog (loop_vec_info, bb_vec_info); -- return false; -+ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), -+ gimple_assign_rhs2_ptr (stmt)); -+ } -+ else -+ { -+ if (vect_print_dump_info (REPORT_SLP)) -+ fprintf (vect_dump, "Build SLP failed: different types "); -+ -+ return false; -+ } - } - } - } + /* In tree-vectorizer.c. */ + unsigned vectorize_loops (void); +--- a/src/gcc/tree-vect-patterns.c ++++ b/src/gcc/tree-vect-patterns.c +@@ -38,33 +38,94 @@ + #include "recog.h" + #include "diagnostic-core.h" - /* Check the types of the definitions. */ -- switch (dt[i]) -+ switch (dt) - { - case vect_constant_def: - case vect_external_def: -+ case vect_reduction_def: - break; +-/* Function prototypes */ +-static void vect_pattern_recog_1 +- (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); +-static bool widened_name_p (tree, gimple, tree *, gimple *); +- + /* Pattern recognition functions */ +-static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); +-static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); +-static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); +-static gimple vect_recog_pow_pattern (gimple, tree *, tree *); ++static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); ++static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, ++ tree *); ++static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, ++ tree *, tree *); ++static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, ++ tree *, tree *); + static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { + vect_recog_widen_mult_pattern, + vect_recog_widen_sum_pattern, + vect_recog_dot_prod_pattern, +- vect_recog_pow_pattern}; ++ vect_recog_pow_pattern, ++ vect_recog_widen_shift_pattern, ++ vect_recog_over_widening_pattern, ++ vect_recog_mixed_size_cond_pattern}; - case vect_internal_def: -- case vect_reduction_def: -- if (i == 0) -- VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); -+ if (different_types) -+ { -+ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); -+ oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); -+ if (i == 0) -+ VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt); -+ else -+ VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt); -+ } - else -- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); -+ VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt); - break; - default: -@@ -309,17 +453,13 @@ - int ncopies_for_cost, unsigned int *max_nunits, - VEC (int, heap) **load_permutation, - VEC (slp_tree, heap) **loads, -- unsigned int vectorization_factor) -+ unsigned int vectorization_factor, bool *loads_permuted) - { -- VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); -- VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); - unsigned int i; - VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); - gimple stmt = VEC_index (gimple, stmts, 0); -- enum vect_def_type first_stmt_dt0 = vect_uninitialized_def; -- enum vect_def_type first_stmt_dt1 = vect_uninitialized_def; - enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; -- tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; -+ enum tree_code first_cond_code = ERROR_MARK; - tree lhs; - bool stop_recursion = false, need_same_oprnds = false; - tree vectype, scalar_type, first_op1 = NULL_TREE; -@@ -328,13 +468,28 @@ - int icode; - enum machine_mode optab_op2_mode; - enum machine_mode vec_mode; -- tree first_stmt_const_oprnd = NULL_TREE; - struct data_reference *first_dr; -- bool pattern0 = false, pattern1 = false; - HOST_WIDE_INT dummy; - bool permutation = false; - unsigned int load_place; - gimple first_load, prev_first_load = NULL; -+ VEC (slp_oprnd_info, heap) *oprnds_info; -+ unsigned int nops; -+ slp_oprnd_info oprnd_info; -+ tree cond; +-/* Function widened_name_p ++/* Check whether STMT2 is in the same loop or basic block as STMT1. ++ Which of the two applies depends on whether we're currently doing ++ loop-based or basic-block-based vectorization, as determined by ++ the vinfo_for_stmt for STMT1 (which must be defined). + +- Check whether NAME, an ssa-name used in USE_STMT, +- is a result of a type-promotion, such that: +- DEF_STMT: NAME = NOP (name0) +- where the type of name0 (HALF_TYPE) is smaller than the type of NAME. +-*/ ++ If this returns true, vinfo_for_stmt for STMT2 is guaranteed ++ to be defined as well. */ + + static bool +-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) ++vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2) ++{ ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1); ++ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + -+ if (is_gimple_call (stmt)) -+ nops = gimple_call_num_args (stmt); -+ else if (is_gimple_assign (stmt)) ++ if (!gimple_bb (stmt2)) ++ return false; ++ ++ if (loop_vinfo) + { -+ nops = gimple_num_ops (stmt) - 1; -+ if (gimple_assign_rhs_code (stmt) == COND_EXPR) -+ nops = 4; ++ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ++ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2))) ++ return false; + } + else -+ return false; ++ { ++ if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo) ++ || gimple_code (stmt2) == GIMPLE_PHI) ++ return false; ++ } + -+ oprnds_info = vect_create_oprnd_info (nops, group_size); ++ gcc_assert (vinfo_for_stmt (stmt2)); ++ return true; ++} ++ ++/* If the LHS of DEF_STMT has a single use, and that statement is ++ in the same loop or basic block, return it. */ ++ ++static gimple ++vect_single_imm_use (gimple def_stmt) ++{ ++ tree lhs = gimple_assign_lhs (def_stmt); ++ use_operand_p use_p; ++ gimple use_stmt; ++ ++ if (!single_imm_use (lhs, &use_p, &use_stmt)) ++ return NULL; ++ ++ if (!vect_same_loop_or_bb_p (def_stmt, use_stmt)) ++ return NULL; ++ ++ return use_stmt; ++} ++ ++/* Check whether NAME, an ssa-name used in USE_STMT, ++ is a result of a type promotion or demotion, such that: ++ DEF_STMT: NAME = NOP (name0) ++ where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME. ++ If CHECK_SIGN is TRUE, check that either both types are signed or both are ++ unsigned. */ ++ ++static bool ++type_conversion_p (tree name, gimple use_stmt, bool check_sign, ++ tree *orig_type, gimple *def_stmt, bool *promotion) + { + tree dummy; + gimple dummy_gimple; +@@ -74,35 +135,43 @@ + tree oprnd0; + enum vect_def_type dt; + tree def; ++ bb_vec_info bb_vinfo; - /* For every stmt in NODE find its def stmt/s. */ - FOR_EACH_VEC_ELT (gimple, stmts, i, stmt) -@@ -355,6 +510,7 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } + stmt_vinfo = vinfo_for_stmt (use_stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); -+ vect_free_oprnd_info (&oprnds_info); - return false; - } +- if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt)) ++ if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt)) + return false; -@@ -364,13 +520,30 @@ - if (vect_print_dump_info (REPORT_SLP)) - { - fprintf (vect_dump, -- "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL"); -+ "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL "); - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } + if (dt != vect_internal_def + && dt != vect_external_def && dt != vect_constant_def) + return false; -+ vect_free_oprnd_info (&oprnds_info); - return false; - } +- if (! *def_stmt) ++ if (!*def_stmt) + return false; -+ if (is_gimple_assign (stmt) -+ && gimple_assign_rhs_code (stmt) == COND_EXPR -+ && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) -+ && !COMPARISON_CLASS_P (cond)) -+ { -+ if (vect_print_dump_info (REPORT_SLP)) -+ { -+ fprintf (vect_dump, -+ "Build SLP failed: condition is not comparison "); -+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -+ } -+ -+ vect_free_oprnd_info (&oprnds_info); -+ return false; -+ } -+ - scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); - vectype = get_vectype_for_scalar_type (scalar_type); - if (!vectype) -@@ -380,23 +553,20 @@ - fprintf (vect_dump, "Build SLP failed: unsupported data-type "); - print_generic_expr (vect_dump, scalar_type, TDF_SLIM); - } -+ -+ vect_free_oprnd_info (&oprnds_info); - return false; - } + if (!is_gimple_assign (*def_stmt)) + return false; -- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); -- if (ncopies != 1) -+ /* In case of multiple types we need to detect the smallest type. */ -+ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) - { -- if (vect_print_dump_info (REPORT_SLP)) -- fprintf (vect_dump, "SLP with multiple types "); -- -- /* FORNOW: multiple types are unsupported in BB SLP. */ -- if (bb_vinfo) -- return false; -+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); -+ if (bb_vinfo) -+ vectorization_factor = *max_nunits; - } +- if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) ++ if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) + return false; -- /* In case of multiple types we need to detect the smallest type. */ -- if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) -- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); -+ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); + oprnd0 = gimple_assign_rhs1 (*def_stmt); - if (is_gimple_call (stmt)) - rhs_code = CALL_EXPR; -@@ -431,6 +601,7 @@ - { - if (vect_print_dump_info (REPORT_SLP)) - fprintf (vect_dump, "Build SLP failed: no optab."); -+ vect_free_oprnd_info (&oprnds_info); - return false; - } - icode = (int) optab_handler (optab, vec_mode); -@@ -439,6 +610,7 @@ - if (vect_print_dump_info (REPORT_SLP)) - fprintf (vect_dump, "Build SLP failed: " - "op not supported by target."); -+ vect_free_oprnd_info (&oprnds_info); - return false; - } - optab_op2_mode = insn_data[icode].operand[2].mode; -@@ -449,6 +621,11 @@ - } - } - } -+ else if (rhs_code == WIDEN_LSHIFT_EXPR) -+ { -+ need_same_oprnds = true; -+ first_op1 = gimple_assign_rhs2 (stmt); -+ } - } - else - { -@@ -470,6 +647,7 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } +- *half_type = TREE_TYPE (oprnd0); +- if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) +- || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) +- || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) ++ *orig_type = TREE_TYPE (oprnd0); ++ if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) ++ || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) ++ return false; ++ ++ if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) ++ *promotion = true; ++ else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2)) ++ *promotion = false; ++ else + return false; -+ vect_free_oprnd_info (&oprnds_info); - return false; - } +- if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy, ++ if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy, + &dt)) + return false; -@@ -483,6 +661,7 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } +@@ -145,9 +214,9 @@ -+ vect_free_oprnd_info (&oprnds_info); - return false; - } - } -@@ -494,15 +673,12 @@ - { - /* Store. */ - if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, -- stmt, &def_stmts0, &def_stmts1, -- &first_stmt_dt0, -- &first_stmt_dt1, -- &first_stmt_def0_type, -- &first_stmt_def1_type, -- &first_stmt_const_oprnd, -- ncopies_for_cost, -- &pattern0, &pattern1)) -- return false; -+ stmt, ncopies_for_cost, -+ (i == 0), &oprnds_info)) -+ { -+ vect_free_oprnd_info (&oprnds_info); -+ return false; -+ } - } - else - { -@@ -520,12 +696,15 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } + Input: -+ vect_free_oprnd_info (&oprnds_info); - return false; - } +- * LAST_STMT: A stmt from which the pattern search begins. In the example, +- when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be +- detected. ++ * STMTS: Contains a stmt from which the pattern search begins. In the ++ example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} ++ will be detected. - /* Check that the size of interleaved loads group is not - greater than the SLP group size. */ -- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) -+ if (loop_vinfo -+ && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) -+ > ncopies * group_size) - { - if (vect_print_dump_info (REPORT_SLP)) - { -@@ -535,6 +714,7 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } + Output: -+ vect_free_oprnd_info (&oprnds_info); - return false; - } +@@ -168,9 +237,10 @@ + inner-loop nested in an outer-loop that us being vectorized). */ -@@ -555,6 +735,7 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } - -+ vect_free_oprnd_info (&oprnds_info); - return false; - } - } -@@ -574,12 +755,13 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } + static gimple +-vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) ++vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, ++ tree *type_out) + { +- gimple stmt; ++ gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); + tree oprnd0, oprnd1; + tree oprnd00, oprnd01; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); +@@ -178,8 +248,14 @@ + gimple pattern_stmt; + tree prod_type; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +- struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *loop; + tree var, rhs; ++ bool promotion; ++ ++ if (!loop_info) ++ return NULL; ++ ++ loop = LOOP_VINFO_LOOP (loop_info); -+ vect_free_oprnd_info (&oprnds_info); - return false; - } + if (!is_gimple_assign (last_stmt)) + return NULL; +@@ -238,7 +314,9 @@ + return NULL; + stmt = last_stmt; - /* Analyze costs (for the first stmt in the group). */ - vect_model_load_cost (vinfo_for_stmt (stmt), -- ncopies_for_cost, *node); -+ ncopies_for_cost, false, *node); - } +- if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) ++ if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt, ++ &promotion) ++ && promotion) + { + stmt = def_stmt; + oprnd0 = gimple_assign_rhs1 (stmt); +@@ -293,10 +371,14 @@ + if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) + || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) + return NULL; +- if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) ++ if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt, ++ &promotion) ++ || !promotion) + return NULL; + oprnd00 = gimple_assign_rhs1 (def_stmt); +- if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) ++ if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt, ++ &promotion) ++ || !promotion) + return NULL; + oprnd01 = gimple_assign_rhs1 (def_stmt); + if (!types_compatible_p (half_type0, half_type1)) +@@ -327,6 +409,88 @@ + return pattern_stmt; + } - /* Store the place of this load in the interleaving chain. In -@@ -601,7 +783,7 @@ - { - if (TREE_CODE_CLASS (rhs_code) == tcc_reference) - { -- /* Not strided load. */ -+ /* Not strided load. */ - if (vect_print_dump_info (REPORT_SLP)) - { - fprintf (vect_dump, "Build SLP failed: not strided load "); -@@ -609,12 +791,14 @@ - } ++ ++/* Handle widening operation by a constant. At the moment we support MULT_EXPR ++ and LSHIFT_EXPR. ++ ++ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR ++ we check that CONST_OPRND is less or equal to the size of HALF_TYPE. ++ ++ Otherwise, if the type of the result (TYPE) is at least 4 times bigger than ++ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) ++ that satisfies the above restrictions, we can perform a widening opeartion ++ from the intermediate type to TYPE and replace a_T = (TYPE) a_t; ++ with a_it = (interm_type) a_t; */ ++ ++static bool ++vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, ++ tree const_oprnd, tree *oprnd, ++ VEC (gimple, heap) **stmts, tree type, ++ tree *half_type, gimple def_stmt) ++{ ++ tree new_type, new_oprnd, tmp; ++ gimple new_stmt; ++ ++ if (code != MULT_EXPR && code != LSHIFT_EXPR) ++ return false; ++ ++ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) ++ || (code == LSHIFT_EXPR ++ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) ++ != 1)) ++ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) ++ { ++ /* CONST_OPRND is a constant of HALF_TYPE. */ ++ *oprnd = gimple_assign_rhs1 (def_stmt); ++ return true; ++ } ++ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)) ++ return false; ++ ++ if (!vect_same_loop_or_bb_p (stmt, def_stmt)) ++ return false; ++ ++ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for ++ a type 2 times bigger than HALF_TYPE. */ ++ new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, ++ TYPE_UNSIGNED (type)); ++ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) ++ || (code == LSHIFT_EXPR ++ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) ++ return false; ++ ++ /* Use NEW_TYPE for widening operation. */ ++ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) ++ { ++ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); ++ /* Check if the already created pattern stmt is what we need. */ ++ if (!is_gimple_assign (new_stmt) ++ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR ++ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) ++ return false; ++ ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); ++ *oprnd = gimple_assign_lhs (new_stmt); ++ } ++ else ++ { ++ /* Create a_T = (NEW_TYPE) a_t; */ ++ *oprnd = gimple_assign_rhs1 (def_stmt); ++ tmp = create_tmp_var (new_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, ++ NULL_TREE); ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); ++ *oprnd = new_oprnd; ++ } ++ ++ *half_type = new_type; ++ return true; ++} ++ + /* Function vect_recog_widen_mult_pattern - /* FORNOW: Not strided loads are not supported. */ -+ vect_free_oprnd_info (&oprnds_info); - return false; - } + Try to find the following pattern: +@@ -342,37 +506,81 @@ - /* Not memory operation. */ - if (TREE_CODE_CLASS (rhs_code) != tcc_binary -- && TREE_CODE_CLASS (rhs_code) != tcc_unary) -+ && TREE_CODE_CLASS (rhs_code) != tcc_unary -+ && rhs_code != COND_EXPR) - { - if (vect_print_dump_info (REPORT_SLP)) - { -@@ -623,19 +807,38 @@ - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } + where type 'TYPE' is at least double the size of type 'type'. -+ vect_free_oprnd_info (&oprnds_info); - return false; - } +- Input: ++ Also detect unsgigned cases: -+ if (rhs_code == COND_EXPR) -+ { -+ tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); +- * LAST_STMT: A stmt from which the pattern search begins. In the example, +- when this function is called with S5, the pattern {S3,S4,S5} is be detected. ++ unsigned type a_t, b_t; ++ unsigned TYPE u_prod_T; ++ TYPE a_T, b_T, prod_T; + -+ if (i == 0) -+ first_cond_code = TREE_CODE (cond_expr); -+ else if (first_cond_code != TREE_CODE (cond_expr)) -+ { -+ if (vect_print_dump_info (REPORT_SLP)) -+ { -+ fprintf (vect_dump, "Build SLP failed: different" -+ " operation"); -+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -+ } ++ S1 a_t = ; ++ S2 b_t = ; ++ S3 a_T = (TYPE) a_t; ++ S4 b_T = (TYPE) b_t; ++ S5 prod_T = a_T * b_T; ++ S6 u_prod_T = (unsigned TYPE) prod_T; + -+ vect_free_oprnd_info (&oprnds_info); -+ return false; -+ } -+ } ++ and multiplication by constants: + - /* Find the def-stmts. */ - if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, -- &def_stmts0, &def_stmts1, -- &first_stmt_dt0, &first_stmt_dt1, -- &first_stmt_def0_type, -- &first_stmt_def1_type, -- &first_stmt_const_oprnd, -- ncopies_for_cost, -- &pattern0, &pattern1)) -- return false; -+ ncopies_for_cost, (i == 0), -+ &oprnds_info)) -+ { -+ vect_free_oprnd_info (&oprnds_info); -+ return false; -+ } - } - } - -@@ -646,61 +849,55 @@ - /* Strided loads were reached - stop the recursion. */ - if (stop_recursion) - { -+ VEC_safe_push (slp_tree, heap, *loads, *node); - if (permutation) - { -- VEC_safe_push (slp_tree, heap, *loads, *node); ++ type a_t; ++ TYPE a_T, prod_T; + -+ *loads_permuted = true; - *inside_cost - += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) - * group_size; - } - else -- { -- /* We don't check here complex numbers chains, so we keep them in -- LOADS for further check in vect_supported_load_permutation_p. */ -+ { -+ /* We don't check here complex numbers chains, so we set -+ LOADS_PERMUTED for further check in -+ vect_supported_load_permutation_p. */ - if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) -- VEC_safe_push (slp_tree, heap, *loads, *node); -+ *loads_permuted = true; - } ++ S1 a_t = ; ++ S3 a_T = (TYPE) a_t; ++ S5 prod_T = a_T * CONST; ++ ++ A special case of multiplication by constants is when 'TYPE' is 4 times ++ bigger than 'type', but CONST fits an intermediate type 2 times smaller ++ than 'TYPE'. In that case we create an additional pattern stmt for S3 ++ to create a variable of the intermediate type, and perform widen-mult ++ on the intermediate type as well: ++ ++ type a_t; ++ interm_type a_it; ++ TYPE a_T, prod_T, prod_T'; ++ ++ S1 a_t = ; ++ S3 a_T = (TYPE) a_t; ++ '--> a_it = (interm_type) a_t; ++ S5 prod_T = a_T * CONST; ++ '--> prod_T' = a_it w* CONST; ++ ++ Input/Output: ++ ++ * STMTS: Contains a stmt from which the pattern search begins. In the ++ example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} ++ is detected. In case of unsigned widen-mult, the original stmt (S5) is ++ replaced with S6 in STMTS. In case of multiplication by a constant ++ of an intermediate type (the last case above), STMTS also contains S3 ++ (inserted before S5). -+ vect_free_oprnd_info (&oprnds_info); - return true; - } + Output: - /* Create SLP_TREE nodes for the definition node/s. */ -- if (first_stmt_dt0 == vect_internal_def) -+ FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info) - { -- slp_tree left_node = XNEW (struct _slp_tree); -- SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; -- SLP_TREE_VEC_STMTS (left_node) = NULL; -- SLP_TREE_LEFT (left_node) = NULL; -- SLP_TREE_RIGHT (left_node) = NULL; -- SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; -- SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; -- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, -- inside_cost, outside_cost, ncopies_for_cost, -- max_nunits, load_permutation, loads, -- vectorization_factor)) -- return false; -+ slp_tree child; + * TYPE_IN: The type of the input arguments to the pattern. -- SLP_TREE_LEFT (*node) = left_node; -- } -+ if (oprnd_info->first_dt != vect_internal_def) -+ continue; +- * TYPE_OUT: The type of the output of this pattern. ++ * TYPE_OUT: The type of the output of this pattern. -- if (first_stmt_dt1 == vect_internal_def) -- { -- slp_tree right_node = XNEW (struct _slp_tree); -- SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; -- SLP_TREE_VEC_STMTS (right_node) = NULL; -- SLP_TREE_LEFT (right_node) = NULL; -- SLP_TREE_RIGHT (right_node) = NULL; -- SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; -- SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; -- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, -+ child = vect_create_new_slp_node (oprnd_info->def_stmts); -+ if (!child -+ || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, - inside_cost, outside_cost, ncopies_for_cost, - max_nunits, load_permutation, loads, -- vectorization_factor)) -- return false; -+ vectorization_factor, loads_permuted)) -+ { -+ if (child) -+ oprnd_info->def_stmts = NULL; -+ vect_free_slp_tree (child); -+ vect_free_oprnd_info (&oprnds_info); -+ return false; -+ } + * Return value: A new stmt that will be used to replace the sequence of +- stmts that constitute the pattern. In this case it will be: ++ stmts that constitute the pattern. In this case it will be: + WIDEN_MULT + */ -- SLP_TREE_RIGHT (*node) = right_node; -+ oprnd_info->def_stmts = NULL; -+ VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child); - } + static gimple +-vect_recog_widen_mult_pattern (gimple last_stmt, +- tree *type_in, +- tree *type_out) ++vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) + { ++ gimple last_stmt = VEC_pop (gimple, *stmts); + gimple def_stmt0, def_stmt1; + tree oprnd0, oprnd1; + tree type, half_type0, half_type1; + gimple pattern_stmt; +- tree vectype, vectype_out; ++ tree vectype, vectype_out = NULL_TREE; + tree dummy; + tree var; + enum tree_code dummy_code; + int dummy_int; + VEC (tree, heap) *dummy_vec; ++ bool op1_ok; ++ bool promotion; -+ vect_free_oprnd_info (&oprnds_info); - return true; - } + if (!is_gimple_assign (last_stmt)) + return NULL; +@@ -391,15 +599,58 @@ + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; -@@ -710,6 +907,7 @@ - { - int i; - gimple stmt; -+ slp_void_p child; +- /* Check argument 0 */ +- if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) ++ /* Check argument 0. */ ++ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, ++ &promotion) ++ || !promotion) + return NULL; +- oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ /* Check argument 1. */ ++ op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1, ++ &def_stmt1, &promotion); ++ if (op1_ok && promotion) ++ { ++ oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ oprnd1 = gimple_assign_rhs1 (def_stmt1); ++ } ++ else ++ { ++ if (TREE_CODE (oprnd1) == INTEGER_CST ++ && TREE_CODE (half_type0) == INTEGER_TYPE ++ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, ++ &oprnd0, stmts, type, ++ &half_type0, def_stmt0)) ++ half_type1 = half_type0; ++ else ++ return NULL; ++ } - if (!node) - return; -@@ -722,8 +920,8 @@ - } - fprintf (vect_dump, "\n"); +- /* Check argument 1 */ +- if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) +- return NULL; +- oprnd1 = gimple_assign_rhs1 (def_stmt1); ++ /* Handle unsigned case. Look for ++ S6 u_prod_T = (unsigned TYPE) prod_T; ++ Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ ++ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) ++ { ++ gimple use_stmt; ++ tree use_lhs; ++ tree use_type; ++ ++ if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) ++ return NULL; ++ ++ use_stmt = vect_single_imm_use (last_stmt); ++ if (!use_stmt || !is_gimple_assign (use_stmt) ++ || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) ++ return NULL; ++ ++ use_lhs = gimple_assign_lhs (use_stmt); ++ use_type = TREE_TYPE (use_lhs); ++ if (!INTEGRAL_TYPE_P (use_type) ++ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) ++ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) ++ return NULL; ++ ++ type = use_type; ++ last_stmt = use_stmt; ++ } + + if (!types_compatible_p (half_type0, half_type1)) + return NULL; +@@ -431,6 +682,7 @@ + if (vect_print_dump_info (REPORT_DETAILS)) + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); -- vect_print_slp_tree (SLP_TREE_LEFT (node)); -- vect_print_slp_tree (SLP_TREE_RIGHT (node)); -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_print_slp_tree ((slp_tree) child); ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); + return pattern_stmt; } +@@ -462,8 +714,9 @@ + */ -@@ -737,6 +935,7 @@ + static gimple +-vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) ++vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) { - int i; ++ gimple last_stmt = VEC_index (gimple, *stmts, 0); + tree fn, base, exp = NULL; gimple stmt; -+ slp_void_p child; - - if (!node) - return; -@@ -745,8 +944,8 @@ - if (j < 0 || i == j) - STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; - -- vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); -- vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_mark_slp_stmts ((slp_tree) child, mark, j); - } - + tree var; +@@ -574,16 +827,24 @@ + inner-loop nested in an outer-loop that us being vectorized). */ -@@ -758,6 +957,7 @@ - int i; + static gimple +-vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) ++vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, ++ tree *type_out) + { ++ gimple last_stmt = VEC_index (gimple, *stmts, 0); gimple stmt; - stmt_vec_info stmt_info; -+ slp_void_p child; - - if (!node) - return; -@@ -770,8 +970,8 @@ - STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; - } - -- vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node)); -- vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node)); -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_mark_slp_stmts_relevant ((slp_tree) child); - } + tree oprnd0, oprnd1; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + tree type, half_type; + gimple pattern_stmt; + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +- struct loop *loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *loop; + tree var; ++ bool promotion; ++ ++ if (!loop_info) ++ return NULL; ++ ++ loop = LOOP_VINFO_LOOP (loop_info); + if (!is_gimple_assign (last_stmt)) + return NULL; +@@ -612,14 +873,16 @@ + || !types_compatible_p (TREE_TYPE (oprnd1), type)) + return NULL; -@@ -844,12 +1044,13 @@ - gimple stmt; - VEC (gimple, heap) *tmp_stmts; - unsigned int index, i; -+ slp_void_p child; +- /* So far so good. Since last_stmt was detected as a (summation) reduction, ++ /* So far so good. Since last_stmt was detected as a (summation) reduction, + we know that oprnd1 is the reduction variable (defined by a loop-header + phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. + Left to check that oprnd0 is defined by a cast from type 'type' to type + 'TYPE'. */ - if (!node) - return; +- if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) +- return NULL; ++ if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt, ++ &promotion) ++ || !promotion) ++ return NULL; -- vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation); -- vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation); -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation); + oprnd0 = gimple_assign_rhs1 (stmt); + *type_in = half_type; +@@ -641,10 +904,715 @@ + when doing outer-loop vectorization. */ + gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); - gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))); - tmp_stmts = VEC_alloc (gimple, heap, group_size); -@@ -881,8 +1082,10 @@ - bool supported, bad_permutation = false; - sbitmap load_index; - slp_tree node, other_complex_node; -- gimple stmt, first = NULL, other_node_first; -+ gimple stmt, first = NULL, other_node_first, load, next_load, first_load; - unsigned complex_numbers = 0; -+ struct data_reference *dr; -+ bb_vec_info bb_vinfo; ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); + return pattern_stmt; + } - /* FORNOW: permutations are only supported in SLP. */ - if (!slp_instn) -@@ -1042,6 +1245,76 @@ - } - } -+ /* In basic block vectorization we allow any subchain of an interleaving -+ chain. -+ FORNOW: not supported in loop SLP because of realignment compications. */ -+ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); -+ bad_permutation = false; -+ /* Check that for every node in the instance teh loads form a subchain. */ -+ if (bb_vinfo) ++/* Return TRUE if the operation in STMT can be performed on a smaller type. ++ ++ Input: ++ STMT - a statement to check. ++ DEF - we support operations with two operands, one of which is constant. ++ The other operand can be defined by a demotion operation, or by a ++ previous statement in a sequence of over-promoted operations. In the ++ later case DEF is used to replace that operand. (It is defined by a ++ pattern statement we created for the previous statement in the ++ sequence). ++ ++ Input/output: ++ NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not ++ NULL, it's the type of DEF. ++ STMTS - additional pattern statements. If a pattern statement (type ++ conversion) is created in this function, its original statement is ++ added to STMTS. ++ ++ Output: ++ OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new ++ operands to use in the new pattern statement for STMT (will be created ++ in vect_recog_over_widening_pattern ()). ++ NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern ++ statements for STMT: the first one is a type promotion and the second ++ one is the operation itself. We return the type promotion statement ++ in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of ++ the second pattern statement. */ ++ ++static bool ++vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, ++ tree *op0, tree *op1, gimple *new_def_stmt, ++ VEC (gimple, heap) **stmts) ++{ ++ enum tree_code code; ++ tree const_oprnd, oprnd; ++ tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type; ++ gimple def_stmt, new_stmt; ++ bool first = false; ++ bool promotion; ++ ++ *new_def_stmt = NULL; ++ ++ if (!is_gimple_assign (stmt)) ++ return false; ++ ++ code = gimple_assign_rhs_code (stmt); ++ if (code != LSHIFT_EXPR && code != RSHIFT_EXPR ++ && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) ++ return false; ++ ++ oprnd = gimple_assign_rhs1 (stmt); ++ const_oprnd = gimple_assign_rhs2 (stmt); ++ type = gimple_expr_type (stmt); ++ ++ if (TREE_CODE (oprnd) != SSA_NAME ++ || TREE_CODE (const_oprnd) != INTEGER_CST) ++ return false; ++ ++ /* If we are in the middle of a sequence, we use DEF from a previous ++ statement. Otherwise, OPRND has to be a result of type promotion. */ ++ if (*new_type) + { -+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ half_type = *new_type; ++ oprnd = def; ++ } ++ else ++ { ++ first = true; ++ if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt, ++ &promotion) ++ || !promotion ++ || !vect_same_loop_or_bb_p (stmt, def_stmt)) ++ return false; ++ } ++ ++ /* Can we perform the operation on a smaller type? */ ++ switch (code) ++ { ++ case BIT_IOR_EXPR: ++ case BIT_XOR_EXPR: ++ case BIT_AND_EXPR: ++ if (!int_fits_type_p (const_oprnd, half_type)) ++ { ++ /* HALF_TYPE is not enough. Try a bigger type if possible. */ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) ++ return false; ++ ++ interm_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ if (!int_fits_type_p (const_oprnd, interm_type)) ++ return false; ++ } ++ ++ break; ++ ++ case LSHIFT_EXPR: ++ /* Try intermediate type - HALF_TYPE is not enough for sure. */ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) ++ return false; ++ ++ /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. ++ (e.g., if the original value was char, the shift amount is at most 8 ++ if we want to use short). */ ++ if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) ++ return false; ++ ++ interm_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ ++ if (!vect_supportable_shift (code, interm_type)) ++ return false; ++ ++ break; ++ ++ case RSHIFT_EXPR: ++ if (vect_supportable_shift (code, half_type)) ++ break; ++ ++ /* Try intermediate type - HALF_TYPE is not supported. */ ++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) ++ return false; ++ ++ interm_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); ++ ++ if (!vect_supportable_shift (code, interm_type)) ++ return false; ++ ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* There are four possible cases: ++ 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's ++ the first statement in the sequence) ++ a. The original, HALF_TYPE, is not enough - we replace the promotion ++ from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. ++ b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original ++ promotion. ++ 2. OPRND is defined by a pattern statement we created. ++ a. Its type is not sufficient for the operation, we create a new stmt: ++ a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store ++ this statement in NEW_DEF_STMT, and it is later put in ++ STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT. ++ b. OPRND is good to use in the new statement. */ ++ if (first) ++ { ++ if (interm_type) + { -+ next_load = NULL; -+ first_load = NULL; -+ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load) ++ /* Replace the original type conversion HALF_TYPE->TYPE with ++ HALF_TYPE->INTERM_TYPE. */ ++ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) + { -+ if (!first_load) -+ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load)); -+ else if (first_load -+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load))) -+ { -+ bad_permutation = true; -+ break; -+ } ++ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); ++ /* Check if the already created pattern stmt is what we need. */ ++ if (!is_gimple_assign (new_stmt) ++ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR ++ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) ++ return false; ++ ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); ++ oprnd = gimple_assign_lhs (new_stmt); ++ } ++ else ++ { ++ /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ ++ oprnd = gimple_assign_rhs1 (def_stmt); ++ tmp = create_tmp_reg (interm_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, ++ oprnd, NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; ++ VEC_safe_push (gimple, heap, *stmts, def_stmt); ++ oprnd = new_oprnd; ++ } ++ } ++ else ++ { ++ /* Retrieve the operand before the type promotion. */ ++ oprnd = gimple_assign_rhs1 (def_stmt); ++ } ++ } ++ else ++ { ++ if (interm_type) ++ { ++ /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ ++ tmp = create_tmp_reg (interm_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, ++ oprnd, NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt; ++ oprnd = new_oprnd; ++ *new_def_stmt = new_stmt; ++ } ++ ++ /* Otherwise, OPRND is already set. */ ++ } ++ ++ if (interm_type) ++ *new_type = interm_type; ++ else ++ *new_type = half_type; ++ ++ *op0 = oprnd; ++ *op1 = fold_convert (*new_type, const_oprnd); ++ ++ return true; ++} ++ ++ ++/* Try to find a statement or a sequence of statements that can be performed ++ on a smaller type: ++ ++ type x_t; ++ TYPE x_T, res0_T, res1_T; ++ loop: ++ S1 x_t = *p; ++ S2 x_T = (TYPE) x_t; ++ S3 res0_T = op (x_T, C0); ++ S4 res1_T = op (res0_T, C1); ++ S5 ... = () res1_T; - type demotion ++ ++ where type 'TYPE' is at least double the size of type 'type', C0 and C1 are ++ constants. ++ Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either ++ be 'type' or some intermediate type. For now, we expect S5 to be a type ++ demotion operation. We also check that S3 and S4 have only one use. ++. ++ ++*/ ++static gimple ++vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) ++{ ++ gimple stmt = VEC_pop (gimple, *stmts); ++ gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; ++ tree op0, op1, vectype = NULL_TREE, use_lhs, use_type; ++ tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; ++ bool first; ++ tree type = NULL; ++ ++ first = true; ++ while (1) ++ { ++ if (!vinfo_for_stmt (stmt) ++ || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) ++ return NULL; ++ ++ new_def_stmt = NULL; ++ if (!vect_operation_fits_smaller_type (stmt, var, &new_type, ++ &op0, &op1, &new_def_stmt, ++ stmts)) ++ { ++ if (first) ++ return NULL; ++ else ++ break; ++ } ++ ++ /* STMT can be performed on a smaller type. Check its uses. */ ++ use_stmt = vect_single_imm_use (stmt); ++ if (!use_stmt || !is_gimple_assign (use_stmt)) ++ return NULL; ++ ++ /* Create pattern statement for STMT. */ ++ vectype = get_vectype_for_scalar_type (new_type); ++ if (!vectype) ++ return NULL; ++ ++ /* We want to collect all the statements for which we create pattern ++ statetments, except for the case when the last statement in the ++ sequence doesn't have a corresponding pattern statement. In such ++ case we associate the last pattern statement with the last statement ++ in the sequence. Therefore, we only add the original statement to ++ the list if we know that it is not the last. */ ++ if (prev_stmt) ++ VEC_safe_push (gimple, heap, *stmts, prev_stmt); ++ ++ var = vect_recog_temp_ssa_var (new_type, NULL); ++ pattern_stmt = gimple_build_assign_with_ops ( ++ gimple_assign_rhs_code (stmt), var, op0, op1); ++ SSA_NAME_DEF_STMT (var) = pattern_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; ++ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt; ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "created pattern stmt: "); ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ } ++ ++ type = gimple_expr_type (stmt); ++ prev_stmt = stmt; ++ stmt = use_stmt; ++ ++ first = false; ++ } ++ ++ /* We got a sequence. We expect it to end with a type demotion operation. ++ Otherwise, we quit (for now). There are three possible cases: the ++ conversion is to NEW_TYPE (we don't do anything), the conversion is to ++ a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and ++ NEW_TYPE differs (we create a new conversion statement). */ ++ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) ++ { ++ use_lhs = gimple_assign_lhs (use_stmt); ++ use_type = TREE_TYPE (use_lhs); ++ /* Support only type demotion or signedess change. */ ++ if (!INTEGRAL_TYPE_P (use_type) ++ || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type)) ++ return NULL; ++ ++ /* Check that NEW_TYPE is not bigger than the conversion result. */ ++ if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) ++ return NULL; ++ ++ if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) ++ || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) ++ { ++ /* Create NEW_TYPE->USE_TYPE conversion. */ ++ tmp = create_tmp_reg (use_type, NULL); ++ add_referenced_var (tmp); ++ new_oprnd = make_ssa_name (tmp, NULL); ++ pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, ++ var, NULL_TREE); ++ SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt; ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; ++ ++ *type_in = get_vectype_for_scalar_type (new_type); ++ *type_out = get_vectype_for_scalar_type (use_type); ++ ++ /* We created a pattern statement for the last statement in the ++ sequence, so we don't need to associate it with the pattern ++ statement created for PREV_STMT. Therefore, we add PREV_STMT ++ to the list in order to mark it later in vect_pattern_recog_1. */ ++ if (prev_stmt) ++ VEC_safe_push (gimple, heap, *stmts, prev_stmt); ++ } ++ else ++ { ++ if (prev_stmt) ++ STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt)) ++ = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt)); ++ ++ *type_in = vectype; ++ *type_out = NULL_TREE; ++ } ++ ++ VEC_safe_push (gimple, heap, *stmts, use_stmt); ++ } ++ else ++ /* TODO: support general case, create a conversion to the correct type. */ ++ return NULL; ++ ++ /* Pattern detected. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: "); ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ } ++ ++ return pattern_stmt; ++} ++ ++ ++/* Detect widening shift pattern: ++ ++ type a_t; ++ TYPE a_T, res_T; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ S3 res_T = a_T << CONST; ++ ++ where type 'TYPE' is at least double the size of type 'type'. ++ ++ Also detect cases where the shift result is immediately converted ++ to another type 'result_type' that is no larger in size than 'TYPE'. ++ In those cases we perform a widen-shift that directly results in ++ 'result_type', to avoid a possible over-widening situation: ++ ++ type a_t; ++ TYPE a_T, res_T; ++ result_type res_result; ++ ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ S3 res_T = a_T << CONST; ++ S4 res_result = (result_type) res_T; ++ '--> res_result' = a_t w<< CONST; ++ ++ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we ++ create an additional pattern stmt for S2 to create a variable of an ++ intermediate type, and perform widen-shift on the intermediate type: + -+ if (j != 0 && next_load != load) -+ { -+ bad_permutation = true; -+ break; -+ } ++ type a_t; ++ interm_type a_it; ++ TYPE a_T, res_T, res_T'; + -+ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load)); -+ } ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ '--> a_it = (interm_type) a_t; ++ S3 res_T = a_T << CONST; ++ '--> res_T' = a_it <<* CONST; + -+ if (bad_permutation) -+ break; -+ } ++ Input/Output: + -+ /* Check that the alignment of the first load in every subchain, i.e., -+ the first statement in every load node, is supported. */ -+ if (!bad_permutation) -+ { -+ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) -+ { -+ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); -+ if (first_load -+ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load))) -+ { -+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load)); -+ if (vect_supportable_dr_alignment (dr, false) -+ == dr_unaligned_unsupported) -+ { -+ if (vect_print_dump_info (REPORT_SLP)) -+ { -+ fprintf (vect_dump, "unsupported unaligned load "); -+ print_gimple_stmt (vect_dump, first_load, 0, -+ TDF_SLIM); -+ } -+ bad_permutation = true; -+ break; -+ } -+ } -+ } ++ * STMTS: Contains a stmt from which the pattern search begins. ++ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 ++ in STMTS. When an intermediate type is used and a pattern statement is ++ created for S2, we also put S2 here (before S3). + -+ if (!bad_permutation) -+ { -+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); -+ return true; -+ } -+ } -+ } ++ Output: + - /* FORNOW: the only supported permutation is 0..01..1.. of length equal to - GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as - well (unless it's reduction). */ -@@ -1140,7 +1413,7 @@ - gimple stmt) - { - slp_instance new_instance; -- slp_tree node = XNEW (struct _slp_tree); -+ slp_tree node; - unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); - unsigned int unrolling_factor = 1, nunits; - tree vectype, scalar_type = NULL_TREE; -@@ -1151,6 +1424,8 @@ - VEC (int, heap) *load_permutation; - VEC (slp_tree, heap) *loads; - struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); -+ bool loads_permuted = false; -+ VEC (gimple, heap) *scalar_stmts; - - if (dr) - { -@@ -1180,7 +1455,6 @@ - if (loop_vinfo) - vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - else -- /* No multitypes in BB SLP. */ - vectorization_factor = nunits; - - /* Calculate the unrolling factor. */ -@@ -1195,39 +1469,31 @@ - } - - /* Create a node (a root of the SLP tree) for the packed strided stores. */ -- SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); -+ scalar_stmts = VEC_alloc (gimple, heap, group_size); - next = stmt; - if (dr) - { - /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ - while (next) - { -- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); -+ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) -+ && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) -+ VEC_safe_push (gimple, heap, scalar_stmts, -+ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); -+ else -+ VEC_safe_push (gimple, heap, scalar_stmts, next); - next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); - } - } - else - { - /* Collect reduction statements. */ -- for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, -- next); -- i++) -- { -- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); -- if (vect_print_dump_info (REPORT_DETAILS)) -- { -- fprintf (vect_dump, "pushing reduction into node: "); -- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM); -- } -- } -+ VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); -+ for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) -+ VEC_safe_push (gimple, heap, scalar_stmts, next); - } - -- SLP_TREE_VEC_STMTS (node) = NULL; -- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; -- SLP_TREE_LEFT (node) = NULL; -- SLP_TREE_RIGHT (node) = NULL; -- SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; -- SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; -+ node = vect_create_new_slp_node (scalar_stmts); - - /* Calculate the number of vector stmts to create based on the unrolling - factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is -@@ -1241,25 +1507,33 @@ - if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, - &inside_cost, &outside_cost, ncopies_for_cost, - &max_nunits, &load_permutation, &loads, -- vectorization_factor)) -+ vectorization_factor, &loads_permuted)) - { -- /* Create a new SLP instance. */ -- new_instance = XNEW (struct _slp_instance); -- SLP_INSTANCE_TREE (new_instance) = node; -- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; -- /* Calculate the unrolling factor based on the smallest type in the -- loop. */ -+ /* Calculate the unrolling factor based on the smallest type. */ - if (max_nunits > nunits) - unrolling_factor = least_common_multiple (max_nunits, group_size) - / group_size; - -+ if (unrolling_factor != 1 && !loop_vinfo) -+ { -+ if (vect_print_dump_info (REPORT_SLP)) -+ fprintf (vect_dump, "Build SLP failed: unrolling required in basic" -+ " block SLP"); -+ return false; -+ } ++ * TYPE_IN: The type of the input arguments to the pattern. + -+ /* Create a new SLP instance. */ -+ new_instance = XNEW (struct _slp_instance); -+ SLP_INSTANCE_TREE (new_instance) = node; -+ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; - SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; - SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; - SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; - SLP_INSTANCE_LOADS (new_instance) = loads; - SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; - SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; -- if (VEC_length (slp_tree, loads)) ++ * TYPE_OUT: The type of the output of this pattern. + -+ if (loads_permuted) - { - if (!vect_supported_load_permutation_p (new_instance, group_size, - load_permutation)) -@@ -1396,6 +1670,7 @@ - imm_use_iterator imm_iter; - gimple use_stmt; - stmt_vec_info stmt_vinfo; -+ slp_void_p child; - - if (!node) - return; -@@ -1413,8 +1688,8 @@ - == vect_reduction_def)) - vect_mark_slp_stmts (node, hybrid, i); - -- vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); -- vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_detect_hybrid_slp_stmts ((slp_tree) child); - } - - -@@ -1504,13 +1779,14 @@ - bool dummy; - int i; - gimple stmt; -+ slp_void_p child; - - if (!node) - return true; - -- if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node)) -- || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node))) -- return false; -+ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child)) -+ return false; - - FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt) - { -@@ -1661,42 +1937,18 @@ - - /* Check if the basic block can be vectorized. */ - --bb_vec_info --vect_slp_analyze_bb (basic_block bb) -+static bb_vec_info -+vect_slp_analyze_bb_1 (basic_block bb) - { - bb_vec_info bb_vinfo; - VEC (ddr_p, heap) *ddrs; - VEC (slp_instance, heap) *slp_instances; - slp_instance instance; -- int i, insns = 0; -- gimple_stmt_iterator gsi; -+ int i; - int min_vf = 2; - int max_vf = MAX_VECTORIZATION_FACTOR; - bool data_dependence_in_bb = false; - -- current_vector_size = 0; -- -- if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); -- -- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) -- { -- gimple stmt = gsi_stmt (gsi); -- if (!is_gimple_debug (stmt) -- && !gimple_nop_p (stmt) -- && gimple_code (stmt) != GIMPLE_LABEL) -- insns++; -- } -- -- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) -- { -- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) -- fprintf (vect_dump, "not vectorized: too many instructions in basic " -- "block.\n"); -- -- return NULL; -- } -- - bb_vinfo = new_bb_vec_info (bb); - if (!bb_vinfo) - return NULL; -@@ -1722,6 +1974,8 @@ - return NULL; - } - -+ vect_pattern_recog (NULL, bb_vinfo); ++ * Return value: A new stmt that will be used to replace the sequence of ++ stmts that constitute the pattern. In this case it will be: ++ WIDEN_LSHIFT_EXPR . */ + - if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, - &data_dependence_in_bb) - || min_vf > max_vf -@@ -1816,6 +2070,61 @@ - } - - -+bb_vec_info -+vect_slp_analyze_bb (basic_block bb) ++static gimple ++vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, ++ tree *type_in, tree *type_out) +{ -+ bb_vec_info bb_vinfo; -+ int insns = 0; -+ gimple_stmt_iterator gsi; -+ unsigned int vector_sizes; ++ gimple last_stmt = VEC_pop (gimple, *stmts); ++ gimple def_stmt0; ++ tree oprnd0, oprnd1; ++ tree type, half_type0; ++ gimple pattern_stmt; ++ tree vectype, vectype_out = NULL_TREE; ++ tree dummy; ++ tree var; ++ enum tree_code dummy_code; ++ int dummy_int; ++ VEC (tree, heap) * dummy_vec; ++ gimple use_stmt; ++ bool promotion; + -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); ++ if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) ++ return NULL; + -+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) -+ { -+ gimple stmt = gsi_stmt (gsi); -+ if (!is_gimple_debug (stmt) -+ && !gimple_nop_p (stmt) -+ && gimple_code (stmt) != GIMPLE_LABEL) -+ insns++; -+ } ++ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) ++ return NULL; + -+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) ++ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) ++ return NULL; ++ ++ oprnd0 = gimple_assign_rhs1 (last_stmt); ++ oprnd1 = gimple_assign_rhs2 (last_stmt); ++ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) ++ return NULL; ++ ++ /* Check operand 0: it has to be defined by a type promotion. */ ++ if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, ++ &promotion) ++ || !promotion) ++ return NULL; ++ ++ /* Check operand 1: has to be positive. We check that it fits the type ++ in vect_handle_widen_op_by_const (). */ ++ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) ++ return NULL; ++ ++ oprnd0 = gimple_assign_rhs1 (def_stmt0); ++ type = gimple_expr_type (last_stmt); ++ ++ /* Check for subsequent conversion to another type. */ ++ use_stmt = vect_single_imm_use (last_stmt); ++ if (use_stmt && is_gimple_assign (use_stmt) ++ && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)) ++ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) + { -+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) -+ fprintf (vect_dump, "not vectorized: too many instructions in basic " -+ "block.\n"); ++ tree use_lhs = gimple_assign_lhs (use_stmt); ++ tree use_type = TREE_TYPE (use_lhs); + -+ return NULL; ++ if (INTEGRAL_TYPE_P (use_type) ++ && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type)) ++ { ++ last_stmt = use_stmt; ++ type = use_type; ++ } + } + -+ /* Autodetect first vector size we try. */ -+ current_vector_size = 0; -+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); ++ /* Check if this a widening operation. */ ++ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, ++ &oprnd0, stmts, ++ type, &half_type0, def_stmt0)) ++ return NULL; ++ ++ /* Pattern detected. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); ++ ++ /* Check target support. */ ++ vectype = get_vectype_for_scalar_type (half_type0); ++ vectype_out = get_vectype_for_scalar_type (type); ++ ++ if (!vectype ++ || !vectype_out ++ || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, ++ vectype_out, vectype, ++ &dummy, &dummy, &dummy_code, ++ &dummy_code, &dummy_int, ++ &dummy_vec)) ++ return NULL; ++ ++ *type_in = vectype; ++ *type_out = vectype_out; ++ ++ /* Pattern supported. Create a stmt to be used to replace the pattern. */ ++ var = vect_recog_temp_ssa_var (type, NULL); ++ pattern_stmt = ++ gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ ++ VEC_safe_push (gimple, heap, *stmts, last_stmt); ++ return pattern_stmt; ++} ++ ++/* Function vect_recog_mixed_size_cond_pattern ++ ++ Try to find the following pattern: ++ ++ type x_t, y_t; ++ TYPE a_T, b_T, c_T; ++ loop: ++ S1 a_T = x_t CMP y_t ? b_T : c_T; ++ ++ where type 'TYPE' is an integral type which has different size ++ from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider ++ than 'type', the constants need to fit into an integer type ++ with the same width as 'type') or results of conversion from 'type'. ++ ++ Input: ++ ++ * LAST_STMT: A stmt from which the pattern search begins. ++ ++ Output: ++ ++ * TYPE_IN: The type of the input arguments to the pattern. ++ ++ * TYPE_OUT: The type of the output of this pattern. ++ ++ * Return value: A new stmt that will be used to replace the pattern. ++ Additionally a def_stmt is added. ++ ++ a_it = x_t CMP y_t ? b_it : c_it; ++ a_T = (TYPE) a_it; */ ++ ++static gimple ++vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, ++ tree *type_out) ++{ ++ gimple last_stmt = VEC_index (gimple, *stmts, 0); ++ tree cond_expr, then_clause, else_clause; ++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; ++ tree type, vectype, comp_vectype, comp_type, op, tmp; ++ enum machine_mode cmpmode; ++ gimple pattern_stmt, def_stmt; ++ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); ++ tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; ++ gimple def_stmt0 = NULL, def_stmt1 = NULL; ++ bool promotion; ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); ++ ++ if (!is_gimple_assign (last_stmt) ++ || gimple_assign_rhs_code (last_stmt) != COND_EXPR ++ || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) ++ return NULL; ++ ++ op = gimple_assign_rhs1 (last_stmt); ++ cond_expr = TREE_OPERAND (op, 0); ++ then_clause = TREE_OPERAND (op, 1); ++ else_clause = TREE_OPERAND (op, 2); ++ ++ if (!COMPARISON_CLASS_P (cond_expr)) ++ return NULL; ++ ++ type = gimple_expr_type (last_stmt); ++ comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); ++ comp_vectype = get_vectype_for_scalar_type (comp_type); ++ if (comp_vectype == NULL_TREE) ++ return NULL; ++ ++ if (types_compatible_p (type, comp_type) ++ || !INTEGRAL_TYPE_P (comp_type) ++ || !INTEGRAL_TYPE_P (type)) ++ return NULL; ++ ++ if ((TREE_CODE (then_clause) != INTEGER_CST ++ && !type_conversion_p (then_clause, last_stmt, false, &orig_type0, ++ &def_stmt0, &promotion)) ++ || (TREE_CODE (else_clause) != INTEGER_CST ++ && !type_conversion_p (else_clause, last_stmt, false, &orig_type1, ++ &def_stmt1, &promotion))) ++ return NULL; ++ ++ if (orig_type0 && orig_type1 ++ && (!types_compatible_p (orig_type0, orig_type1) ++ || !types_compatible_p (orig_type0, comp_type))) ++ return NULL; ++ ++ if (orig_type0) ++ then_clause = gimple_assign_rhs1 (def_stmt0); ++ ++ if (orig_type1) ++ else_clause = gimple_assign_rhs1 (def_stmt1); ++ ++ cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); ++ if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) ++ return NULL; ++ ++ vectype = get_vectype_for_scalar_type (type); ++ if (vectype == NULL_TREE) ++ return NULL; ++ ++ if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype))) ++ return NULL; ++ ++ if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode) ++ && ((TREE_CODE (then_clause) == INTEGER_CST ++ && !int_fits_type_p (then_clause, comp_type)) ++ || (TREE_CODE (else_clause) == INTEGER_CST ++ && !int_fits_type_p (else_clause, comp_type)))) ++ return NULL; ++ ++ tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr), ++ fold_convert (comp_type, then_clause), ++ fold_convert (comp_type, else_clause)); ++ def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL), ++ tmp); ++ ++ pattern_stmt ++ = gimple_build_assign_with_ops (NOP_EXPR, ++ vect_recog_temp_ssa_var (type, NULL), ++ gimple_assign_lhs (def_stmt), NULL_TREE); ++ ++ STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; ++ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); ++ set_vinfo_for_stmt (def_stmt, def_stmt_info); ++ STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype; ++ *type_in = vectype; ++ *type_out = vectype; ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: "); ++ ++ return pattern_stmt; ++} ++ + -+ while (1) -+ { -+ bb_vinfo = vect_slp_analyze_bb_1 (bb); -+ if (bb_vinfo) -+ return bb_vinfo; ++/* Mark statements that are involved in a pattern. */ + -+ destroy_bb_vec_info (bb_vinfo); ++static inline void ++vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, ++ tree pattern_vectype) ++{ ++ stmt_vec_info pattern_stmt_info, def_stmt_info; ++ stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); ++ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info); ++ gimple def_stmt; + -+ vector_sizes &= ~current_vector_size; -+ if (vector_sizes == 0 -+ || current_vector_size == 0) -+ return NULL; ++ set_vinfo_for_stmt (pattern_stmt, ++ new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo)); ++ gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); ++ pattern_stmt_info = vinfo_for_stmt (pattern_stmt); + -+ /* Try the next biggest vector size. */ -+ current_vector_size = 1 << floor_log2 (vector_sizes); -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "***** Re-trying analysis with " -+ "vector size %d\n", current_vector_size); ++ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; ++ STMT_VINFO_DEF_TYPE (pattern_stmt_info) ++ = STMT_VINFO_DEF_TYPE (orig_stmt_info); ++ STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; ++ STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; ++ STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; ++ STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) ++ = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); ++ if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) ++ { ++ def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); ++ def_stmt_info = vinfo_for_stmt (def_stmt); ++ if (def_stmt_info == NULL) ++ { ++ def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); ++ set_vinfo_for_stmt (def_stmt, def_stmt_info); ++ } ++ gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); ++ STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; ++ STMT_VINFO_DEF_TYPE (def_stmt_info) ++ = STMT_VINFO_DEF_TYPE (orig_stmt_info); ++ if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) ++ STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; + } +} + -+ - /* SLP costs are calculated according to SLP instance unrolling factor (i.e., - the number of created vector stmts depends on the unrolling factor). - However, the actual number of vector stmts for every SLP node depends on -@@ -1939,15 +2248,15 @@ - - For example, we have two scalar operands, s1 and s2 (e.g., group of - strided accesses of size two), while NUNITS is four (i.e., four scalars -- of this type can be packed in a vector). The output vector will contain -- two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES -+ of this type can be packed in a vector). The output vector will contain -+ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES - will be 2). + /* Function vect_pattern_recog_1 - If GROUP_SIZE > NUNITS, the scalars will be split into several vectors - containing the operands. + Input: +@@ -669,29 +1637,33 @@ - For example, NUNITS is four as before, and the group size is 8 -- (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and -+ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and - {s5, s6, s7, s8}. */ + static void + vect_pattern_recog_1 ( +- gimple (* vect_recog_func) (gimple, tree *, tree *), +- gimple_stmt_iterator si) ++ gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *), ++ gimple_stmt_iterator si, ++ VEC (gimple, heap) **stmts_to_replace) + { + gimple stmt = gsi_stmt (si), pattern_stmt; +- stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +- stmt_vec_info pattern_stmt_info; +- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); ++ stmt_vec_info stmt_info; ++ loop_vec_info loop_vinfo; + tree pattern_vectype; + tree type_in, type_out; + enum tree_code code; + int i; + gimple next; - number_of_copies = least_common_multiple (nunits, group_size) / group_size; -@@ -1959,8 +2268,18 @@ - { - if (is_store) - op = gimple_assign_rhs1 (stmt); -- else -+ else if (gimple_assign_rhs_code (stmt) != COND_EXPR) - op = gimple_op (stmt, op_num + 1); -+ else -+ { -+ if (op_num == 0 || op_num == 1) -+ { -+ tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); -+ op = TREE_OPERAND (cond, op_num); -+ } -+ else -+ op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1); -+ } +- pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); ++ VEC_truncate (gimple, *stmts_to_replace, 0); ++ VEC_quick_push (gimple, *stmts_to_replace, stmt); ++ pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); + if (!pattern_stmt) + return; - if (reduc_index != -1) - { -@@ -2055,88 +2374,102 @@ - If the scalar definitions are loop invariants or constants, collect them and - call vect_get_constant_vectors() to create vector stmts. - Otherwise, the def-stmts must be already vectorized and the vectorized stmts -- must be stored in the LEFT/RIGHT node of SLP_NODE, and we call -- vect_get_slp_vect_defs() to retrieve them. -- If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from -- the right node. This is used when the second operand must remain scalar. */ -+ must be stored in the corresponding child of SLP_NODE, and we call -+ vect_get_slp_vect_defs () to retrieve them. */ ++ stmt = VEC_last (gimple, *stmts_to_replace); ++ stmt_info = vinfo_for_stmt (stmt); ++ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); ++ + if (VECTOR_MODE_P (TYPE_MODE (type_in))) + { + /* No need to check target support (already checked by the pattern + recognition function). */ +- if (type_out) +- gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out))); + pattern_vectype = type_out ? type_out : type_in; + } + else +@@ -736,22 +1708,32 @@ + } - void --vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node, -- VEC (tree,heap) **vec_oprnds0, -- VEC (tree,heap) **vec_oprnds1, int reduc_index) --{ -- gimple first_stmt; -- enum tree_code code; -- int number_of_vects; -+vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node, -+ VEC (slp_void_p, heap) **vec_oprnds, int reduc_index) -+{ -+ gimple first_stmt, first_def; -+ int number_of_vects = 0, i; -+ unsigned int child_index = 0; - HOST_WIDE_INT lhs_size_unit, rhs_size_unit; -+ slp_tree child = NULL; -+ VEC (tree, heap) *vec_defs; -+ tree oprnd, def_lhs; -+ bool vectorized_defs; + /* Mark the stmts that are involved in the pattern. */ +- gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT); +- set_vinfo_for_stmt (pattern_stmt, +- new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); +- pattern_stmt_info = vinfo_for_stmt (pattern_stmt); +- +- STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; +- STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); +- STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; +- STMT_VINFO_IN_PATTERN_P (stmt_info) = true; +- STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt; ++ vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); - first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); -- /* The number of vector defs is determined by the number of vector statements -- in the node from which we get those statements. */ -- if (SLP_TREE_LEFT (slp_node)) -- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node)); -- else -- { -- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); -- /* Number of vector stmts was calculated according to LHS in -- vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if -- necessary. See vect_get_smallest_scalar_type () for details. */ -- vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, -- &rhs_size_unit); -- if (rhs_size_unit != lhs_size_unit) -- { -- number_of_vects *= rhs_size_unit; -- number_of_vects /= lhs_size_unit; -- } -- } -+ FOR_EACH_VEC_ELT (tree, ops, i, oprnd) -+ { -+ /* For each operand we check if it has vectorized definitions in a child -+ node or we need to create them (for invariants and constants). We -+ check if the LHS of the first stmt of the next child matches OPRND. -+ If it does, we found the correct child. Otherwise, we call -+ vect_get_constant_vectors (), and not advance CHILD_INDEX in order -+ to check this child node for the next operand. */ -+ vectorized_defs = false; -+ if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index) + /* Patterns cannot be vectorized using SLP, because they change the order of + computation. */ +- FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) +- if (next == stmt) +- VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); ++ if (loop_vinfo) ++ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) ++ if (next == stmt) ++ VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); ++ ++ /* It is possible that additional pattern stmts are created and inserted in ++ STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the ++ relevant statements. */ ++ for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) ++ && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); ++ i++) ++ { ++ stmt_info = vinfo_for_stmt (stmt); ++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); ++ if (vect_print_dump_info (REPORT_DETAILS)) + { -+ child = (slp_tree) VEC_index (slp_void_p, -+ SLP_TREE_CHILDREN (slp_node), -+ child_index); -+ first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0); ++ fprintf (vect_dump, "additional pattern stmt: "); ++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); ++ } + -+ /* In the end of a pattern sequence we have a use of the original stmt, -+ so we need to compare OPRND with the original def. */ -+ if (is_pattern_stmt_p (vinfo_for_stmt (first_def)) -+ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt)) -+ && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt))) -+ first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); ++ vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); ++ } + } -- /* Allocate memory for vectorized defs. */ -- *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); -- -- /* SLP_NODE corresponds either to a group of stores or to a group of -- unary/binary operations. We don't call this function for loads. -- For reduction defs we call vect_get_constant_vectors(), since we are -- looking for initial loop invariant values. */ -- if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) -- /* The defs are already vectorized. */ -- vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); -- else -- /* Build vectors from scalar defs. */ -- vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects, -- reduc_index); -- -- if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) -- /* Since we don't call this function with loads, this is a group of -- stores. */ -- return; -- -- /* For reductions, we only need initial values. */ -- if (reduc_index != -1) -- return; -+ if (is_gimple_call (first_def)) -+ def_lhs = gimple_call_lhs (first_def); -+ else -+ def_lhs = gimple_assign_lhs (first_def); -- code = gimple_assign_rhs_code (first_stmt); -- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) -- return; -+ if (operand_equal_p (oprnd, def_lhs, 0)) -+ { -+ /* The number of vector defs is determined by the number of -+ vector statements in the node from which we get those -+ statements. */ -+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); -+ vectorized_defs = true; -+ child_index++; -+ } -+ } +@@ -761,8 +1743,8 @@ + LOOP_VINFO - a struct_loop_info of a loop in which we want to look for + computation idioms. -- /* The number of vector defs is determined by the number of vector statements -- in the node from which we get those statements. */ -- if (SLP_TREE_RIGHT (slp_node)) -- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node)); -- else -- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); -+ if (!vectorized_defs) -+ { -+ if (i == 0) -+ { -+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); -+ /* Number of vector stmts was calculated according to LHS in -+ vect_schedule_slp_instance (), fix it by replacing LHS with -+ RHS, if necessary. See vect_get_smallest_scalar_type () for -+ details. */ -+ vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, -+ &rhs_size_unit); -+ if (rhs_size_unit != lhs_size_unit) -+ { -+ number_of_vects *= rhs_size_unit; -+ number_of_vects /= lhs_size_unit; -+ } -+ } -+ } +- Output - for each computation idiom that is detected we insert a new stmt +- that provides the same functionality and that can be vectorized. We ++ Output - for each computation idiom that is detected we create a new stmt ++ that provides the same functionality and that can be vectorized. We + also record some information in the struct_stmt_info of the relevant + stmts, as explained below: -- *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects); -+ /* Allocate memory for vectorized defs. */ -+ vec_defs = VEC_alloc (tree, heap, number_of_vects); +@@ -777,79 +1759,113 @@ + S5: ... = ..use(a_0).. - - - -- if (SLP_TREE_RIGHT (slp_node)) -- /* The defs are already vectorized. */ -- vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); -- else -- /* Build vectors from scalar defs. */ -- vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects, -- -1); -+ /* For reduction defs we call vect_get_constant_vectors (), since we are -+ looking for initial loop invariant values. */ -+ if (vectorized_defs && reduc_index == -1) -+ /* The defs are already vectorized. */ -+ vect_get_slp_vect_defs (child, &vec_defs); -+ else -+ /* Build vectors from scalar defs. */ -+ vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, -+ number_of_vects, reduc_index); -+ -+ VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs); + Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be +- represented by a single stmt. We then: +- - create a new stmt S6 that will replace the pattern. +- - insert the new stmt S6 before the last stmt in the pattern ++ represented by a single stmt. We then: ++ - create a new stmt S6 equivalent to the pattern (the stmt is not ++ inserted into the code) + - fill in the STMT_VINFO fields as follows: + + in_pattern_p related_stmt vec_stmt + S1: a_i = .... - - - + S2: a_2 = ..use(a_i).. - - - + S3: a_1 = ..use(a_2).. - - - +- > S6: a_new = .... - S4 - + S4: a_0 = ..use(a_1).. true S6 - ++ '---> S6: a_new = .... - S4 - + S5: ... = ..use(a_0).. - - - + + (the last stmt in the pattern (S4) and the new pattern stmt (S6) point +- to each other through the RELATED_STMT field). ++ to each other through the RELATED_STMT field). + + S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead + of S4 because it will replace all its uses. Stmts {S1,S2,S3} will + remain irrelevant unless used by stmts other than S4. + + If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} +- (because they are marked as irrelevant). It will vectorize S6, and record +- a pointer to the new vector stmt VS6 both from S6 (as usual), and also +- from S4. We do that so that when we get to vectorizing stmts that use the +- def of S4 (like S5 that uses a_0), we'll know where to take the relevant +- vector-def from. S4 will be skipped, and S5 will be vectorized as usual: ++ (because they are marked as irrelevant). It will vectorize S6, and record ++ a pointer to the new vector stmt VS6 from S6 (as usual). ++ S4 will be skipped, and S5 will be vectorized as usual: + + in_pattern_p related_stmt vec_stmt + S1: a_i = .... - - - + S2: a_2 = ..use(a_i).. - - - + S3: a_1 = ..use(a_2).. - - - + > VS6: va_new = .... - - - +- S6: a_new = .... - S4 VS6 + S4: a_0 = ..use(a_1).. true S6 VS6 ++ '---> S6: a_new = .... - S4 VS6 + > VS5: ... = ..vuse(va_new).. - - - + S5: ... = ..use(a_0).. - - - + +- DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used ++ DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used + elsewhere), and we'll end up with: + + VS6: va_new = .... +- VS5: ... = ..vuse(va_new).. ++ VS5: ... = ..vuse(va_new).. + +- If vectorization does not succeed, DCE will clean S6 away (its def is +- not used), and we'll end up with the original sequence. +-*/ ++ In case of more than one pattern statements, e.g., widen-mult with ++ intermediate type: + -+ /* For reductions, we only need initial values. */ -+ if (reduc_index != -1) -+ return; ++ S1 a_t = ; ++ S2 a_T = (TYPE) a_t; ++ '--> S3: a_it = (interm_type) a_t; ++ S4 prod_T = a_T * CONST; ++ '--> S5: prod_T' = a_it w* CONST; ++ ++ there may be other users of a_T outside the pattern. In that case S2 will ++ be marked as relevant (as well as S3), and both S2 and S3 will be analyzed ++ and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will ++ be recorded in S3. */ + + void +-vect_pattern_recog (loop_vec_info loop_vinfo) ++vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) + { +- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); +- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); +- unsigned int nbbs = loop->num_nodes; ++ struct loop *loop; ++ basic_block *bbs, bb; ++ unsigned int nbbs; + gimple_stmt_iterator si; + unsigned int i, j; +- gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); ++ gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); ++ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); ++ gimple stmt; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== vect_pattern_recog ==="); + +- /* Scan through the loop stmts, applying the pattern recognition ++ if (loop_vinfo) ++ { ++ loop = LOOP_VINFO_LOOP (loop_vinfo); ++ bbs = LOOP_VINFO_BBS (loop_vinfo); ++ nbbs = loop->num_nodes; + } ++ else ++ { ++ bb = BB_VINFO_BB (bb_vinfo); ++ nbbs = 1; ++ bbs = XNEW (basic_block); ++ bbs[0] = bb; ++ } ++ ++ /* Scan through the stmts, applying the pattern recognition + functions starting at each stmt visited: */ + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { ++ if (bb_vinfo && (stmt = gsi_stmt (si)) ++ && vinfo_for_stmt (stmt) ++ && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) ++ continue; ++ + /* Scan over all generic vect_recog_xxx_pattern functions. */ + for (j = 0; j < NUM_PATTERNS; j++) + { + vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; +- vect_pattern_recog_1 (vect_recog_func_ptr, si); ++ vect_pattern_recog_1 (vect_recog_func_ptr, si, ++ &stmts_to_replace); + } + } + } ++ ++ VEC_free (gimple, heap, stmts_to_replace); ++ if (bb_vinfo) ++ free (bbs); } - -- - /* Create NCOPIES permutation statements using the mask MASK_BYTES (by - building a vector of type MASK_TYPE from it) and two input vectors placed in - DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and -@@ -2453,14 +2786,14 @@ - tree vectype; - int i; - slp_tree loads_node; +--- a/src/gcc/tree-vect-slp.c ++++ b/src/gcc/tree-vect-slp.c +@@ -67,15 +67,16 @@ + static void + vect_free_slp_tree (slp_tree node) + { ++ int i; + slp_void_p child; - ++ if (!node) - return false; + return; -- vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance, -- vectorization_factor); -- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance, -- vectorization_factor); +- if (SLP_TREE_LEFT (node)) +- vect_free_slp_tree (SLP_TREE_LEFT (node)); +- +- if (SLP_TREE_RIGHT (node)) +- vect_free_slp_tree (SLP_TREE_RIGHT (node)); + FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) -+ vect_schedule_slp_instance ((slp_tree) child, instance, -+ vectorization_factor); - - stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); - stmt_info = vinfo_for_stmt (stmt); -@@ -2507,8 +2840,11 @@ - /* Loads should be inserted before the first load. */ - if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) - && STMT_VINFO_STRIDED_ACCESS (stmt_info) -- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) -+ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) -+ && SLP_INSTANCE_LOAD_PERMUTATION (instance)) - si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); -+ else if (is_pattern_stmt_p (stmt_info)) -+ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); - else - si = gsi_for_stmt (stmt); ++ vect_free_slp_tree ((slp_tree) child); ---- a/src/gcc/tree-vect-stmts.c -+++ b/src/gcc/tree-vect-stmts.c -@@ -42,6 +42,82 @@ - #include "langhooks.h" ++ VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node)); + VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node)); + + if (SLP_TREE_VEC_STMTS (node)) +@@ -96,46 +97,151 @@ + } -+/* Return a variable of type ELEM_TYPE[NELEMS]. */ +-/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that +- they are of a legal type and that they match the defs of the first stmt of +- the SLP group (stored in FIRST_STMT_...). */ ++/* Create an SLP node for SCALAR_STMTS. */ + -+static tree -+create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) ++static slp_tree ++vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts) +{ -+ return create_tmp_var (build_array_type_nelts (elem_type, nelems), -+ "vect_array"); ++ slp_tree node; ++ gimple stmt = VEC_index (gimple, scalar_stmts, 0); ++ unsigned int nops; ++ ++ if (is_gimple_call (stmt)) ++ nops = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ { ++ nops = gimple_num_ops (stmt) - 1; ++ if (gimple_assign_rhs_code (stmt) == COND_EXPR) ++ nops = 4; ++ } ++ else ++ return NULL; ++ ++ node = XNEW (struct _slp_tree); ++ SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; ++ SLP_TREE_VEC_STMTS (node) = NULL; ++ SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops); ++ SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; ++ SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; ++ ++ return node; +} + -+/* ARRAY is an array of vectors created by create_vector_array. -+ Return an SSA_NAME for the vector in index N. The reference -+ is part of the vectorization of STMT and the vector is associated -+ with scalar destination SCALAR_DEST. */ + -+static tree -+read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest, -+ tree array, unsigned HOST_WIDE_INT n) ++/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each ++ operand. */ ++static VEC (slp_oprnd_info, heap) * ++vect_create_oprnd_info (int nops, int group_size) +{ -+ tree vect_type, vect, vect_name, array_ref; -+ gimple new_stmt; -+ -+ gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); -+ vect_type = TREE_TYPE (TREE_TYPE (array)); -+ vect = vect_create_destination_var (scalar_dest, vect_type); -+ array_ref = build4 (ARRAY_REF, vect_type, array, -+ build_int_cst (size_type_node, n), -+ NULL_TREE, NULL_TREE); ++ int i; ++ slp_oprnd_info oprnd_info; ++ VEC (slp_oprnd_info, heap) *oprnds_info; + -+ new_stmt = gimple_build_assign (vect, array_ref); -+ vect_name = make_ssa_name (vect, new_stmt); -+ gimple_assign_set_lhs (new_stmt, vect_name); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ mark_symbols_for_renaming (new_stmt); ++ oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops); ++ for (i = 0; i < nops; i++) ++ { ++ oprnd_info = XNEW (struct _slp_oprnd_info); ++ oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size); ++ oprnd_info->first_dt = vect_uninitialized_def; ++ oprnd_info->first_def_type = NULL_TREE; ++ oprnd_info->first_const_oprnd = NULL_TREE; ++ oprnd_info->first_pattern = false; ++ VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info); ++ } + -+ return vect_name; ++ return oprnds_info; +} + -+/* ARRAY is an array of vectors created by create_vector_array. -+ Emit code to store SSA_NAME VECT in index N of the array. -+ The store is part of the vectorization of STMT. */ ++ ++/* Free operands info. */ + +static void -+write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect, -+ tree array, unsigned HOST_WIDE_INT n) ++vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info) +{ -+ tree array_ref; -+ gimple new_stmt; ++ int i; ++ slp_oprnd_info oprnd_info; + -+ array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, -+ build_int_cst (size_type_node, n), -+ NULL_TREE, NULL_TREE); ++ FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info) ++ { ++ VEC_free (gimple, heap, oprnd_info->def_stmts); ++ XDELETE (oprnd_info); ++ } + -+ new_stmt = gimple_build_assign (array_ref, vect); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ mark_symbols_for_renaming (new_stmt); ++ VEC_free (slp_oprnd_info, heap, *oprnds_info); +} + -+/* PTR is a pointer to an array of type TYPE. Return a representation -+ of *PTR. The memory reference replaces those in FIRST_DR -+ (and its group). */ + -+static tree -+create_array_ref (tree type, tree ptr, struct data_reference *first_dr) -+{ -+ struct ptr_info_def *pi; -+ tree mem_ref, alias_ptr_type; ++/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that ++ they are of a valid type and that they match the defs of the first stmt of ++ the SLP group (stored in OPRNDS_INFO). */ + + static bool + vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, + slp_tree slp_node, gimple stmt, +- VEC (gimple, heap) **def_stmts0, +- VEC (gimple, heap) **def_stmts1, +- enum vect_def_type *first_stmt_dt0, +- enum vect_def_type *first_stmt_dt1, +- tree *first_stmt_def0_type, +- tree *first_stmt_def1_type, +- tree *first_stmt_const_oprnd, +- int ncopies_for_cost, +- bool *pattern0, bool *pattern1) ++ int ncopies_for_cost, bool first, ++ VEC (slp_oprnd_info, heap) **oprnds_info) + { + tree oprnd; + unsigned int i, number_of_oprnds; +- tree def; ++ tree def, def_op0 = NULL_TREE; + gimple def_stmt; +- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; +- stmt_vec_info stmt_info = +- vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)); +- enum gimple_rhs_class rhs_class; ++ enum vect_def_type dt = vect_uninitialized_def; ++ enum vect_def_type dt_op0 = vect_uninitialized_def; ++ stmt_vec_info stmt_info = vinfo_for_stmt (stmt); ++ tree lhs = gimple_get_lhs (stmt); + struct loop *loop = NULL; ++ enum tree_code rhs_code; ++ bool different_types = false; ++ bool pattern = false; ++ slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; ++ int op_idx = 1; ++ tree compare_rhs = NULL_TREE, rhs = NULL_TREE; ++ int cond_idx = -1; + + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); + +- rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt)); +- number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */ ++ if (is_gimple_call (stmt)) ++ number_of_oprnds = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ { ++ number_of_oprnds = gimple_num_ops (stmt) - 1; ++ if (gimple_assign_rhs_code (stmt) == COND_EXPR) ++ { ++ number_of_oprnds = 4; ++ cond_idx = 0; ++ rhs = gimple_assign_rhs1 (stmt); ++ } ++ } ++ else ++ return false; + + for (i = 0; i < number_of_oprnds; i++) + { +- oprnd = gimple_op (stmt, i + 1); ++ if (compare_rhs) ++ oprnd = compare_rhs; ++ else ++ oprnd = gimple_op (stmt, op_idx++); + -+ alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr)); -+ mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); -+ /* Arrays have the same alignment as their type. */ -+ pi = get_ptr_info (ptr); -+ pi->align = TYPE_ALIGN_UNIT (type); -+ pi->misalign = 0; -+ return mem_ref; -+} ++ oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i); + - /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ ++ if (-1 < cond_idx && cond_idx < 4) ++ { ++ if (compare_rhs) ++ compare_rhs = NULL_TREE; ++ else ++ oprnd = TREE_OPERAND (rhs, cond_idx++); ++ } ++ ++ if (COMPARISON_CLASS_P (oprnd)) ++ { ++ compare_rhs = TREE_OPERAND (oprnd, 1); ++ oprnd = TREE_OPERAND (oprnd, 0); ++ } - /* Function vect_mark_relevant. -@@ -50,33 +126,72 @@ + if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def, +- &dt[i]) +- || (!def_stmt && dt[i] != vect_constant_def)) ++ &dt) ++ || (!def_stmt && dt != vect_constant_def)) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -149,34 +255,32 @@ + /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt + from the pattern. Check that all the stmts of the node are in the + pattern. */ +- if (loop && def_stmt && gimple_bb (def_stmt) +- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) ++ if (def_stmt && gimple_bb (def_stmt) ++ && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) ++ || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo) ++ && gimple_code (def_stmt) != GIMPLE_PHI)) + && vinfo_for_stmt (def_stmt) +- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))) ++ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) ++ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) ++ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) + { +- if (!*first_stmt_dt0) +- *pattern0 = true; +- else +- { +- if (i == 1 && !*first_stmt_dt1) +- *pattern1 = true; +- else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1)) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "Build SLP failed: some of the stmts" +- " are in a pattern, and others are not "); +- print_generic_expr (vect_dump, oprnd, TDF_SLIM); +- } ++ pattern = true; ++ if (!first && !oprnd_info->first_pattern) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "Build SLP failed: some of the stmts" ++ " are in a pattern, and others are not "); ++ print_generic_expr (vect_dump, oprnd, TDF_SLIM); ++ } + +- return false; +- } ++ return false; + } + + def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); +- dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); ++ dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); + +- if (*dt == vect_unknown_def_type) ++ if (dt == vect_unknown_def_type) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unsupported pattern."); +@@ -200,85 +304,125 @@ + } + } + +- if (!*first_stmt_dt0) ++ if (first) + { +- /* op0 of the first stmt of the group - store its info. */ +- *first_stmt_dt0 = dt[i]; +- if (def) +- *first_stmt_def0_type = TREE_TYPE (def); +- else +- *first_stmt_const_oprnd = oprnd; ++ oprnd_info->first_dt = dt; ++ oprnd_info->first_pattern = pattern; ++ if (def) ++ { ++ oprnd_info->first_def_type = TREE_TYPE (def); ++ oprnd_info->first_const_oprnd = NULL_TREE; ++ } ++ else ++ { ++ oprnd_info->first_def_type = NULL_TREE; ++ oprnd_info->first_const_oprnd = oprnd; ++ } + +- /* Analyze costs (for the first stmt of the group only). */ +- if (rhs_class != GIMPLE_SINGLE_RHS) +- /* Not memory operation (we don't call this functions for loads). */ +- vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); +- else +- /* Store. */ +- vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node); ++ if (i == 0) ++ { ++ def_op0 = def; ++ dt_op0 = dt; ++ /* Analyze costs (for the first stmt of the group only). */ ++ if (REFERENCE_CLASS_P (lhs)) ++ /* Store. */ ++ vect_model_store_cost (stmt_info, ncopies_for_cost, false, ++ dt, slp_node); ++ else ++ /* Not memory operation (we don't call this function for ++ loads). */ ++ vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt, ++ slp_node); ++ } + } + + else + { +- if (!*first_stmt_dt1 && i == 1) ++ /* Not first stmt of the group, check that the def-stmt/s match ++ the def-stmt/s of the first stmt. Allow different definition ++ types for reduction chains: the first stmt must be a ++ vect_reduction_def (a phi node), and the rest ++ vect_internal_def. */ ++ if (((oprnd_info->first_dt != dt ++ && !(oprnd_info->first_dt == vect_reduction_def ++ && dt == vect_internal_def)) ++ || (oprnd_info->first_def_type != NULL_TREE ++ && def ++ && !types_compatible_p (oprnd_info->first_def_type, ++ TREE_TYPE (def)))) ++ || (!def ++ && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), ++ TREE_TYPE (oprnd))) ++ || different_types) + { +- /* op1 of the first stmt of the group - store its info. */ +- *first_stmt_dt1 = dt[i]; +- if (def) +- *first_stmt_def1_type = TREE_TYPE (def); +- else ++ if (number_of_oprnds != 2) + { +- /* We assume that the stmt contains only one constant +- operand. We fail otherwise, to be on the safe side. */ +- if (*first_stmt_const_oprnd) +- { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: two constant " +- "oprnds in stmt"); +- return false; +- } +- *first_stmt_const_oprnd = oprnd; +- } +- } +- else +- { +- /* Not first stmt of the group, check that the def-stmt/s match +- the def-stmt/s of the first stmt. */ +- if ((i == 0 +- && (*first_stmt_dt0 != dt[i] +- || (*first_stmt_def0_type && def +- && !types_compatible_p (*first_stmt_def0_type, +- TREE_TYPE (def))))) +- || (i == 1 +- && (*first_stmt_dt1 != dt[i] +- || (*first_stmt_def1_type && def +- && !types_compatible_p (*first_stmt_def1_type, +- TREE_TYPE (def))))) +- || (!def +- && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd), +- TREE_TYPE (oprnd)))) ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); ++ ++ return false; ++ } ++ ++ /* Try to swap operands in case of binary operation. */ ++ if (i == 0) ++ different_types = true; ++ else + { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "Build SLP failed: different types "); ++ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ if (is_gimple_assign (stmt) ++ && (rhs_code = gimple_assign_rhs_code (stmt)) ++ && TREE_CODE_CLASS (rhs_code) == tcc_binary ++ && commutative_tree_code (rhs_code) ++ && oprnd0_info->first_dt == dt ++ && oprnd_info->first_dt == dt_op0 ++ && def_op0 && def ++ && !(oprnd0_info->first_def_type ++ && !types_compatible_p (oprnd0_info->first_def_type, ++ TREE_TYPE (def))) ++ && !(oprnd_info->first_def_type ++ && !types_compatible_p (oprnd_info->first_def_type, ++ TREE_TYPE (def_op0)))) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "Swapping operands of "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } + +- return false; ++ swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), ++ gimple_assign_rhs2_ptr (stmt)); ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: different types "); ++ ++ return false; ++ } + } + } + } + + /* Check the types of the definitions. */ +- switch (dt[i]) ++ switch (dt) + { + case vect_constant_def: + case vect_external_def: ++ case vect_reduction_def: + break; + + case vect_internal_def: +- case vect_reduction_def: +- if (i == 0) +- VEC_safe_push (gimple, heap, *def_stmts0, def_stmt); ++ if (different_types) ++ { ++ oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0); ++ if (i == 0) ++ VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt); ++ else ++ VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt); ++ } + else +- VEC_safe_push (gimple, heap, *def_stmts1, def_stmt); ++ VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt); + break; + + default: +@@ -309,17 +453,13 @@ + int ncopies_for_cost, unsigned int *max_nunits, + VEC (int, heap) **load_permutation, + VEC (slp_tree, heap) **loads, +- unsigned int vectorization_factor) ++ unsigned int vectorization_factor, bool *loads_permuted) + { +- VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); +- VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); + unsigned int i; + VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); + gimple stmt = VEC_index (gimple, stmts, 0); +- enum vect_def_type first_stmt_dt0 = vect_uninitialized_def; +- enum vect_def_type first_stmt_dt1 = vect_uninitialized_def; + enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; +- tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; ++ enum tree_code first_cond_code = ERROR_MARK; + tree lhs; + bool stop_recursion = false, need_same_oprnds = false; + tree vectype, scalar_type, first_op1 = NULL_TREE; +@@ -328,13 +468,28 @@ + int icode; + enum machine_mode optab_op2_mode; + enum machine_mode vec_mode; +- tree first_stmt_const_oprnd = NULL_TREE; + struct data_reference *first_dr; +- bool pattern0 = false, pattern1 = false; + HOST_WIDE_INT dummy; + bool permutation = false; + unsigned int load_place; + gimple first_load, prev_first_load = NULL; ++ VEC (slp_oprnd_info, heap) *oprnds_info; ++ unsigned int nops; ++ slp_oprnd_info oprnd_info; ++ tree cond; ++ ++ if (is_gimple_call (stmt)) ++ nops = gimple_call_num_args (stmt); ++ else if (is_gimple_assign (stmt)) ++ { ++ nops = gimple_num_ops (stmt) - 1; ++ if (gimple_assign_rhs_code (stmt) == COND_EXPR) ++ nops = 4; ++ } ++ else ++ return false; ++ ++ oprnds_info = vect_create_oprnd_info (nops, group_size); + + /* For every stmt in NODE find its def stmt/s. */ + FOR_EACH_VEC_ELT (gimple, stmts, i, stmt) +@@ -355,6 +510,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } - static void - vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, -- enum vect_relevant relevant, bool live_p) -+ enum vect_relevant relevant, bool live_p, -+ bool used_in_pattern) - { - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); - bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); -+ gimple pattern_stmt; +@@ -364,13 +520,30 @@ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, +- "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL"); ++ "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } -+ /* If this stmt is an original stmt in a pattern, we might need to mark its -+ related pattern stmt instead of the original stmt. However, such stmts -+ may have their own uses that are not in any pattern, in such cases the -+ stmt itself should be marked. */ - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) - { -- gimple pattern_stmt; -+ bool found = false; -+ if (!used_in_pattern) ++ if (is_gimple_assign (stmt) ++ && gimple_assign_rhs_code (stmt) == COND_EXPR ++ && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) ++ && !COMPARISON_CLASS_P (cond)) + { -+ imm_use_iterator imm_iter; -+ use_operand_p use_p; -+ gimple use_stmt; -+ tree lhs; -+ -+ if (is_gimple_assign (stmt)) -+ lhs = gimple_assign_lhs (stmt); -+ else -+ lhs = gimple_call_lhs (stmt); -+ -+ /* This use is out of pattern use, if LHS has other uses that are -+ pattern uses, we should mark the stmt itself, and not the pattern -+ stmt. */ -+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ if (vect_print_dump_info (REPORT_SLP)) + { -+ if (is_gimple_debug (USE_STMT (use_p))) -+ continue; -+ use_stmt = USE_STMT (use_p); -+ -+ if (vinfo_for_stmt (use_stmt) -+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) -+ { -+ found = true; -+ break; -+ } ++ fprintf (vect_dump, ++ "Build SLP failed: condition is not comparison "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } ++ ++ vect_free_oprnd_info (&oprnds_info); ++ return false; + } + -+ if (!found) -+ { -+ /* This is the last stmt in a sequence that was detected as a -+ pattern that can potentially be vectorized. Don't mark the stmt -+ as relevant/live because it's not going to be vectorized. -+ Instead mark the pattern-stmt that replaces it. */ + scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); + vectype = get_vectype_for_scalar_type (scalar_type); + if (!vectype) +@@ -380,23 +553,20 @@ + fprintf (vect_dump, "Build SLP failed: unsupported data-type "); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); + } + -+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } -- /* This is the last stmt in a sequence that was detected as a -- pattern that can potentially be vectorized. Don't mark the stmt -- as relevant/live because it's not going to be vectorized. -- Instead mark the pattern-stmt that replaces it. */ -- -- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); +- ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); +- if (ncopies != 1) ++ /* In case of multiple types we need to detect the smallest type. */ ++ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) + { +- if (vect_print_dump_info (REPORT_SLP)) +- fprintf (vect_dump, "SLP with multiple types "); - -- if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); -- stmt_info = vinfo_for_stmt (pattern_stmt); -- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); -- save_relevant = STMT_VINFO_RELEVANT (stmt_info); -- save_live_p = STMT_VINFO_LIVE_P (stmt_info); -- stmt = pattern_stmt; -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "last stmt in pattern. don't mark" -+ " relevant/live."); -+ stmt_info = vinfo_for_stmt (pattern_stmt); -+ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); -+ save_relevant = STMT_VINFO_RELEVANT (stmt_info); -+ save_live_p = STMT_VINFO_LIVE_P (stmt_info); -+ stmt = pattern_stmt; -+ } - } - - STMT_VINFO_LIVE_P (stmt_info) |= live_p; -@@ -361,7 +476,8 @@ +- /* FORNOW: multiple types are unsupported in BB SLP. */ +- if (bb_vinfo) +- return false; ++ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ if (bb_vinfo) ++ vectorization_factor = *max_nunits; } - } -- vect_mark_relevant (worklist, def_stmt, relevant, live_p); -+ vect_mark_relevant (worklist, def_stmt, relevant, live_p, -+ is_pattern_stmt_p (stmt_vinfo)); - return true; - } +- /* In case of multiple types we need to detect the smallest type. */ +- if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) +- *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); ++ ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); -@@ -418,7 +534,7 @@ + if (is_gimple_call (stmt)) + rhs_code = CALL_EXPR; +@@ -431,6 +601,7 @@ + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: no optab."); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + icode = (int) optab_handler (optab, vec_mode); +@@ -439,6 +610,7 @@ + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: " + "op not supported by target."); ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + optab_op2_mode = insn_data[icode].operand[2].mode; +@@ -449,6 +621,11 @@ + } + } } - - if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) -- vect_mark_relevant (&worklist, phi, relevant, live_p); -+ vect_mark_relevant (&worklist, phi, relevant, live_p, false); ++ else if (rhs_code == WIDEN_LSHIFT_EXPR) ++ { ++ need_same_oprnds = true; ++ first_op1 = gimple_assign_rhs2 (stmt); ++ } } - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + else { -@@ -430,7 +546,7 @@ +@@ -470,6 +647,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; } - if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) -- vect_mark_relevant (&worklist, stmt, relevant, live_p); -+ vect_mark_relevant (&worklist, stmt, relevant, live_p, false); +@@ -483,6 +661,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } } - } +@@ -494,15 +673,12 @@ + { + /* Store. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, +- stmt, &def_stmts0, &def_stmts1, +- &first_stmt_dt0, +- &first_stmt_dt1, +- &first_stmt_def0_type, +- &first_stmt_def1_type, +- &first_stmt_const_oprnd, +- ncopies_for_cost, +- &pattern0, &pattern1)) +- return false; ++ stmt, ncopies_for_cost, ++ (i == 0), &oprnds_info)) ++ { ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } + else + { +@@ -520,12 +696,15 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } -@@ -529,15 +645,109 @@ - break; - } ++ vect_free_oprnd_info (&oprnds_info); + return false; + } -- FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) -- { -- tree op = USE_FROM_PTR (use_p); -- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist)) -- { -- VEC_free (gimple, heap, worklist); -- return false; -- } -- } -+ if (is_pattern_stmt_p (vinfo_for_stmt (stmt))) -+ { -+ /* Pattern statements are not inserted into the code, so -+ FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we -+ have to scan the RHS or function arguments instead. */ -+ if (is_gimple_assign (stmt)) + /* Check that the size of interleaved loads group is not + greater than the SLP group size. */ +- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size) ++ if (loop_vinfo ++ && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) ++ > ncopies * group_size) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -535,6 +714,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + +@@ -555,6 +735,7 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + } +@@ -574,12 +755,13 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + + /* Analyze costs (for the first stmt in the group). */ + vect_model_load_cost (vinfo_for_stmt (stmt), +- ncopies_for_cost, *node); ++ ncopies_for_cost, false, *node); + } + + /* Store the place of this load in the interleaving chain. In +@@ -601,7 +783,7 @@ + { + if (TREE_CODE_CLASS (rhs_code) == tcc_reference) + { +- /* Not strided load. */ ++ /* Not strided load. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: not strided load "); +@@ -609,12 +791,14 @@ + } + + /* FORNOW: Not strided loads are not supported. */ ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + + /* Not memory operation. */ + if (TREE_CODE_CLASS (rhs_code) != tcc_binary +- && TREE_CODE_CLASS (rhs_code) != tcc_unary) ++ && TREE_CODE_CLASS (rhs_code) != tcc_unary ++ && rhs_code != COND_EXPR) + { + if (vect_print_dump_info (REPORT_SLP)) + { +@@ -623,19 +807,38 @@ + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + ++ vect_free_oprnd_info (&oprnds_info); + return false; + } + ++ if (rhs_code == COND_EXPR) + { -+ tree rhs = gimple_assign_rhs1 (stmt); -+ unsigned int op_num; -+ tree op; -+ enum tree_code rhs_code; -+ switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) -+ { -+ case GIMPLE_SINGLE_RHS: -+ op = gimple_assign_rhs1 (stmt); -+ rhs_code = gimple_assign_rhs_code (stmt); -+ i = 0; -+ if (rhs_code == COND_EXPR -+ && COMPARISON_CLASS_P (TREE_OPERAND (op, 0))) -+ { -+ op = TREE_OPERAND (op, 0); -+ if (!process_use (stmt, TREE_OPERAND (op, 0), -+ loop_vinfo, -+ live_p, relevant, &worklist) -+ || !process_use (stmt, TREE_OPERAND (op, 1), -+ loop_vinfo, -+ live_p, relevant, &worklist)) -+ { -+ VEC_free (gimple, heap, worklist); -+ return false; -+ } -+ i = 1; -+ } -+ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); -+ for (i; i < op_num; i++) -+ { -+ op = TREE_OPERAND (rhs, i); -+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, -+ &worklist)) -+ { -+ VEC_free (gimple, heap, worklist); -+ return false; -+ } -+ } -+ break; -+ -+ case GIMPLE_BINARY_RHS: -+ op = gimple_assign_rhs1 (stmt); -+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, -+ &worklist)) -+ { -+ VEC_free (gimple, heap, worklist); -+ return false; -+ } -+ op = gimple_assign_rhs2 (stmt); -+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, -+ &worklist)) -+ { -+ VEC_free (gimple, heap, worklist); -+ return false; -+ } -+ break; -+ -+ case GIMPLE_UNARY_RHS: -+ op = gimple_assign_rhs1 (stmt); -+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, -+ &worklist)) -+ { -+ VEC_free (gimple, heap, worklist); -+ return false; -+ } ++ tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); + -+ break; -+ -+ default: -+ return false; -+ } -+ } -+ else if (is_gimple_call (stmt)) -+ { -+ for (i = 0; i < gimple_call_num_args (stmt); i++) ++ if (i == 0) ++ first_cond_code = TREE_CODE (cond_expr); ++ else if (first_cond_code != TREE_CODE (cond_expr)) + { -+ tree arg = gimple_call_arg (stmt, i); -+ if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, -+ &worklist)) ++ if (vect_print_dump_info (REPORT_SLP)) + { -+ VEC_free (gimple, heap, worklist); -+ return false; ++ fprintf (vect_dump, "Build SLP failed: different" ++ " operation"); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } -+ } ++ ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } -+ } -+ else -+ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) -+ { -+ tree op = USE_FROM_PTR (use_p); -+ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, -+ &worklist)) -+ { -+ VEC_free (gimple, heap, worklist); -+ return false; -+ } -+ } - } /* while worklist */ ++ + /* Find the def-stmts. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, +- &def_stmts0, &def_stmts1, +- &first_stmt_dt0, &first_stmt_dt1, +- &first_stmt_def0_type, +- &first_stmt_def1_type, +- &first_stmt_const_oprnd, +- ncopies_for_cost, +- &pattern0, &pattern1)) +- return false; ++ ncopies_for_cost, (i == 0), ++ &oprnds_info)) ++ { ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + } + } + +@@ -646,61 +849,55 @@ + /* Strided loads were reached - stop the recursion. */ + if (stop_recursion) + { ++ VEC_safe_push (slp_tree, heap, *loads, *node); + if (permutation) + { +- VEC_safe_push (slp_tree, heap, *loads, *node); ++ ++ *loads_permuted = true; + *inside_cost + += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) + * group_size; + } + else +- { +- /* We don't check here complex numbers chains, so we keep them in +- LOADS for further check in vect_supported_load_permutation_p. */ ++ { ++ /* We don't check here complex numbers chains, so we set ++ LOADS_PERMUTED for further check in ++ vect_supported_load_permutation_p. */ + if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) +- VEC_safe_push (slp_tree, heap, *loads, *node); ++ *loads_permuted = true; + } + ++ vect_free_oprnd_info (&oprnds_info); + return true; + } + + /* Create SLP_TREE nodes for the definition node/s. */ +- if (first_stmt_dt0 == vect_internal_def) ++ FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info) + { +- slp_tree left_node = XNEW (struct _slp_tree); +- SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; +- SLP_TREE_VEC_STMTS (left_node) = NULL; +- SLP_TREE_LEFT (left_node) = NULL; +- SLP_TREE_RIGHT (left_node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; +- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size, +- inside_cost, outside_cost, ncopies_for_cost, +- max_nunits, load_permutation, loads, +- vectorization_factor)) +- return false; ++ slp_tree child; + +- SLP_TREE_LEFT (*node) = left_node; +- } ++ if (oprnd_info->first_dt != vect_internal_def) ++ continue; + +- if (first_stmt_dt1 == vect_internal_def) +- { +- slp_tree right_node = XNEW (struct _slp_tree); +- SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; +- SLP_TREE_VEC_STMTS (right_node) = NULL; +- SLP_TREE_LEFT (right_node) = NULL; +- SLP_TREE_RIGHT (right_node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; +- if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size, ++ child = vect_create_new_slp_node (oprnd_info->def_stmts); ++ if (!child ++ || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads, +- vectorization_factor)) +- return false; ++ vectorization_factor, loads_permuted)) ++ { ++ if (child) ++ oprnd_info->def_stmts = NULL; ++ vect_free_slp_tree (child); ++ vect_free_oprnd_info (&oprnds_info); ++ return false; ++ } + +- SLP_TREE_RIGHT (*node) = right_node; ++ oprnd_info->def_stmts = NULL; ++ VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child); + } - VEC_free (gimple, heap, worklist); -@@ -648,7 +858,8 @@ ++ vect_free_oprnd_info (&oprnds_info); + return true; + } - void - vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, -- enum vect_def_type dt, slp_tree slp_node) -+ bool store_lanes_p, enum vect_def_type dt, -+ slp_tree slp_node) +@@ -710,6 +907,7 @@ { - int group_size; - unsigned int inside_cost = 0, outside_cost = 0; -@@ -685,9 +896,11 @@ - first_dr = STMT_VINFO_DATA_REF (stmt_info); + int i; + gimple stmt; ++ slp_void_p child; + + if (!node) + return; +@@ -722,8 +920,8 @@ } + fprintf (vect_dump, "\n"); -- /* Is this an access in a group of stores, which provide strided access? -- If so, add in the cost of the permutes. */ -- if (group_size > 1) -+ /* We assume that the cost of a single store-lanes instruction is -+ equivalent to the cost of GROUP_SIZE separate stores. If a strided -+ access is instead being provided by a permute-and-store operation, -+ include the cost of the permutes. */ -+ if (!store_lanes_p && group_size > 1) - { - /* Uses a high and low interleave operation for each needed permute. */ - inside_cost = ncopies * exact_log2(group_size) * group_size -@@ -763,8 +976,8 @@ - access scheme chosen. */ +- vect_print_slp_tree (SLP_TREE_LEFT (node)); +- vect_print_slp_tree (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_print_slp_tree ((slp_tree) child); + } - void --vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) -- -+vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p, -+ slp_tree slp_node) + +@@ -737,6 +935,7 @@ { - int group_size; - gimple first_stmt; -@@ -789,9 +1002,11 @@ - first_dr = dr; - } + int i; + gimple stmt; ++ slp_void_p child; -- /* Is this an access in a group of loads providing strided access? -- If so, add in the cost of the permutes. */ -- if (group_size > 1) -+ /* We assume that the cost of a single load-lanes instruction is -+ equivalent to the cost of GROUP_SIZE separate loads. If a strided -+ access is instead being provided by a load-and-permute operation, -+ include the cost of the permutes. */ -+ if (!load_lanes_p && group_size > 1) - { - /* Uses an even and odd extract operations for each needed permute. */ - inside_cost = ncopies * exact_log2(group_size) * group_size -@@ -1068,7 +1283,14 @@ + if (!node) + return; +@@ -745,8 +944,8 @@ + if (j < 0 || i == j) + STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; - /* Get the def from the vectorized stmt. */ - def_stmt_info = vinfo_for_stmt (def_stmt); -+ - vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); -+ /* Get vectorized pattern statement. */ -+ if (!vec_stmt -+ && STMT_VINFO_IN_PATTERN_P (def_stmt_info) -+ && !STMT_VINFO_RELEVANT (def_stmt_info)) -+ vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( -+ STMT_VINFO_RELATED_STMT (def_stmt_info))); - gcc_assert (vec_stmt); - if (gimple_code (vec_stmt) == GIMPLE_PHI) - vec_oprnd = PHI_RESULT (vec_stmt); -@@ -1217,16 +1439,35 @@ +- vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); +- vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_mark_slp_stmts ((slp_tree) child, mark, j); } --/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not -- NULL. */ -+/* Get vectorized definitions for OP0 and OP1. -+ REDUC_INDEX is the index of reduction operand in case of reduction, -+ and -1 otherwise. */ +@@ -758,6 +957,7 @@ + int i; + gimple stmt; + stmt_vec_info stmt_info; ++ slp_void_p child; --static void -+void - vect_get_vec_defs (tree op0, tree op1, gimple stmt, -- VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, -- slp_tree slp_node) -+ VEC (tree, heap) **vec_oprnds0, -+ VEC (tree, heap) **vec_oprnds1, -+ slp_tree slp_node, int reduc_index) - { - if (slp_node) -- vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1); -+ { -+ int nops = (op1 == NULL_TREE) ? 1 : 2; -+ VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops); -+ VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops); -+ -+ VEC_quick_push (tree, ops, op0); -+ if (op1) -+ VEC_quick_push (tree, ops, op1); -+ -+ vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); -+ -+ *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); -+ if (op1) -+ *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1); -+ -+ VEC_free (tree, heap, ops); -+ VEC_free (slp_void_p, heap, vec_defs); -+ } - else - { - tree vec_oprnd; -@@ -1324,6 +1565,7 @@ - VEC(tree, heap) *vargs = NULL; - enum { NARROW, NONE, WIDEN } modifier; - size_t i, nargs; -+ tree lhs; + if (!node) + return; +@@ -770,8 +970,8 @@ + STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; + } - /* FORNOW: unsupported in basic block SLP. */ - gcc_assert (loop_vinfo); -@@ -1461,7 +1703,7 @@ - /** Transform. **/ +- vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node)); +- vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_mark_slp_stmts_relevant ((slp_tree) child); + } - if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "transform operation."); -+ fprintf (vect_dump, "transform call."); - /* Handle def. */ - scalar_dest = gimple_call_lhs (stmt); -@@ -1580,8 +1822,11 @@ - rhs of the statement with something harmless. */ +@@ -844,12 +1044,13 @@ + gimple stmt; + VEC (gimple, heap) *tmp_stmts; + unsigned int index, i; ++ slp_void_p child; - type = TREE_TYPE (scalar_dest); -- new_stmt = gimple_build_assign (gimple_call_lhs (stmt), -- build_zero_cst (type)); -+ if (is_pattern_stmt_p (stmt_info)) -+ lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); -+ else -+ lhs = gimple_call_lhs (stmt); -+ new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); - set_vinfo_for_stmt (new_stmt, stmt_info); - /* For pattern statements make the related statement to point to - NEW_STMT in order to be able to retrieve the original statement -@@ -1810,7 +2055,8 @@ - for (j = 0; j < ncopies; j++) - { - if (j == 0) -- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); -+ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, -+ -1); - else - vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); + if (!node) + return; -@@ -2015,7 +2261,7 @@ - { - /* Handle uses. */ - if (j == 0) -- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); -+ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); - else - vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); +- vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation); +- vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation); -@@ -2048,6 +2294,42 @@ - } + gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))); + tmp_stmts = VEC_alloc (gimple, heap, group_size); +@@ -881,8 +1082,10 @@ + bool supported, bad_permutation = false; + sbitmap load_index; + slp_tree node, other_complex_node; +- gimple stmt, first = NULL, other_node_first; ++ gimple stmt, first = NULL, other_node_first, load, next_load, first_load; + unsigned complex_numbers = 0; ++ struct data_reference *dr; ++ bb_vec_info bb_vinfo; + /* FORNOW: permutations are only supported in SLP. */ + if (!slp_instn) +@@ -1042,6 +1245,76 @@ + } + } -+/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE -+ either as shift by a scalar or by a vector. */ -+ -+bool -+vect_supportable_shift (enum tree_code code, tree scalar_type) -+{ -+ -+ enum machine_mode vec_mode; -+ optab optab; -+ int icode; -+ tree vectype; ++ /* In basic block vectorization we allow any subchain of an interleaving ++ chain. ++ FORNOW: not supported in loop SLP because of realignment compications. */ ++ bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)); ++ bad_permutation = false; ++ /* Check that for every node in the instance teh loads form a subchain. */ ++ if (bb_vinfo) ++ { ++ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ { ++ next_load = NULL; ++ first_load = NULL; ++ FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load) ++ { ++ if (!first_load) ++ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load)); ++ else if (first_load ++ != DR_GROUP_FIRST_DR (vinfo_for_stmt (load))) ++ { ++ bad_permutation = true; ++ break; ++ } + -+ vectype = get_vectype_for_scalar_type (scalar_type); -+ if (!vectype) -+ return false; ++ if (j != 0 && next_load != load) ++ { ++ bad_permutation = true; ++ break; ++ } + -+ optab = optab_for_tree_code (code, vectype, optab_scalar); -+ if (!optab -+ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) -+ { -+ optab = optab_for_tree_code (code, vectype, optab_vector); -+ if (!optab -+ || (optab_handler (optab, TYPE_MODE (vectype)) -+ == CODE_FOR_nothing)) -+ return false; -+ } ++ next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load)); ++ } + -+ vec_mode = TYPE_MODE (vectype); -+ icode = (int) optab_handler (optab, vec_mode); -+ if (icode == CODE_FOR_nothing) -+ return false; ++ if (bad_permutation) ++ break; ++ } + -+ return true; -+} ++ /* Check that the alignment of the first load in every subchain, i.e., ++ the first statement in every load node, is supported. */ ++ if (!bad_permutation) ++ { ++ FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node) ++ { ++ first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); ++ if (first_load ++ != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load))) ++ { ++ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load)); ++ if (vect_supportable_dr_alignment (dr, false) ++ == dr_unaligned_unsupported) ++ { ++ if (vect_print_dump_info (REPORT_SLP)) ++ { ++ fprintf (vect_dump, "unsupported unaligned load "); ++ print_gimple_stmt (vect_dump, first_load, 0, ++ TDF_SLIM); ++ } ++ bad_permutation = true; ++ break; ++ } ++ } ++ } + ++ if (!bad_permutation) ++ { ++ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); ++ return true; ++ } ++ } ++ } + - /* Function vectorizable_shift. + /* FORNOW: the only supported permutation is 0..01..1.. of length equal to + GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as + well (unless it's reduction). */ +@@ -1140,7 +1413,7 @@ + gimple stmt) + { + slp_instance new_instance; +- slp_tree node = XNEW (struct _slp_tree); ++ slp_tree node; + unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); + unsigned int unrolling_factor = 1, nunits; + tree vectype, scalar_type = NULL_TREE; +@@ -1151,6 +1424,8 @@ + VEC (int, heap) *load_permutation; + VEC (slp_tree, heap) *loads; + struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); ++ bool loads_permuted = false; ++ VEC (gimple, heap) *scalar_stmts; - Check if STMT performs a shift operation that can be vectorized. -@@ -2334,10 +2616,10 @@ - operand 1 should be of a vector type (the usual case). */ - if (vec_oprnd1) - vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, -- slp_node); -+ slp_node, -1); - else - vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, -- slp_node); -+ slp_node, -1); - } - else - vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); -@@ -2645,10 +2927,10 @@ - { - if (op_type == binary_op || op_type == ternary_op) - vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, -- slp_node); -+ slp_node, -1); - else - vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, -- slp_node); -+ slp_node, -1); - if (op_type == ternary_op) - { - vec_oprnds2 = VEC_alloc (tree, heap, 1); -@@ -2839,11 +3121,9 @@ - VEC (tree, heap) *vec_oprnds0 = NULL; - VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; - tree last_oprnd, intermediate_type; -+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + if (dr) + { +@@ -1180,7 +1455,6 @@ + if (loop_vinfo) + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + else +- /* No multitypes in BB SLP. */ + vectorization_factor = nunits; -- /* FORNOW: not supported by basic block SLP vectorization. */ -- gcc_assert (loop_vinfo); -- -- if (!STMT_VINFO_RELEVANT_P (stmt_info)) -+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) - return false; + /* Calculate the unrolling factor. */ +@@ -1195,39 +1469,31 @@ + } - if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) -@@ -2871,7 +3151,7 @@ - && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) - && CONVERT_EXPR_CODE_P (code)))) - return false; -- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, -+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, - &def_stmt, &def, &dt[0], &vectype_in)) - { - if (vect_print_dump_info (REPORT_DETAILS)) -@@ -2962,7 +3242,8 @@ + /* Create a node (a root of the SLP tree) for the packed strided stores. */ +- SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size); ++ scalar_stmts = VEC_alloc (gimple, heap, group_size); + next = stmt; + if (dr) { - /* Handle uses. */ - if (slp_node) -- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1); -+ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, -+ slp_node, -1); - else + /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ + while (next) { - VEC_free (tree, heap, vec_oprnds0); -@@ -3118,11 +3399,10 @@ - int multi_step_cvt = 0; - VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; - VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; -+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); -+ unsigned int k; - -- /* FORNOW: not supported by basic block SLP vectorization. */ -- gcc_assert (loop_vinfo); -- -- if (!STMT_VINFO_RELEVANT_P (stmt_info)) -+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) - return false; - - if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) -@@ -3137,7 +3417,8 @@ +- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); ++ if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) ++ && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) ++ VEC_safe_push (gimple, heap, scalar_stmts, ++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); ++ else ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + } + } + else + { + /* Collect reduction statements. */ +- for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, +- next); +- i++) +- { +- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next); +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "pushing reduction into node: "); +- print_gimple_stmt (vect_dump, next, 0, TDF_SLIM); +- } +- } ++ VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo); ++ for (i = 0; VEC_iterate (gimple, reductions, i, next); i++) ++ VEC_safe_push (gimple, heap, scalar_stmts, next); + } - code = gimple_assign_rhs_code (stmt); - if (!CONVERT_EXPR_CODE_P (code) -- && code != WIDEN_MULT_EXPR) -+ && code != WIDEN_MULT_EXPR -+ && code != WIDEN_LSHIFT_EXPR) - return false; +- SLP_TREE_VEC_STMTS (node) = NULL; +- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; +- SLP_TREE_LEFT (node) = NULL; +- SLP_TREE_RIGHT (node) = NULL; +- SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; +- SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; ++ node = vect_create_new_slp_node (scalar_stmts); - scalar_dest = gimple_assign_lhs (stmt); -@@ -3151,13 +3432,40 @@ - && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) - && CONVERT_EXPR_CODE_P (code)))) - return false; -- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, -+ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, - &def_stmt, &def, &dt[0], &vectype_in)) + /* Calculate the number of vector stmts to create based on the unrolling + factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is +@@ -1241,25 +1507,33 @@ + if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, + &inside_cost, &outside_cost, ncopies_for_cost, + &max_nunits, &load_permutation, &loads, +- vectorization_factor)) ++ vectorization_factor, &loads_permuted)) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "use not simple."); - return false; - } -+ -+ op_type = TREE_CODE_LENGTH (code); -+ if (op_type == binary_op) -+ { -+ bool ok; -+ -+ op1 = gimple_assign_rhs2 (stmt); -+ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) +- /* Create a new SLP instance. */ +- new_instance = XNEW (struct _slp_instance); +- SLP_INSTANCE_TREE (new_instance) = node; +- SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; +- /* Calculate the unrolling factor based on the smallest type in the +- loop. */ ++ /* Calculate the unrolling factor based on the smallest type. */ + if (max_nunits > nunits) + unrolling_factor = least_common_multiple (max_nunits, group_size) + / group_size; + ++ if (unrolling_factor != 1 && !loop_vinfo) + { -+ /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of -+ OP1. */ -+ if (CONSTANT_CLASS_P (op0)) -+ ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, -+ &def_stmt, &def, &dt[1], &vectype_in); -+ else -+ ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, -+ &def, &dt[1]); ++ if (vect_print_dump_info (REPORT_SLP)) ++ fprintf (vect_dump, "Build SLP failed: unrolling required in basic" ++ " block SLP"); ++ return false; ++ } + -+ if (!ok) -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "use not simple."); -+ return false; -+ } -+ } -+ } ++ /* Create a new SLP instance. */ ++ new_instance = XNEW (struct _slp_instance); ++ SLP_INSTANCE_TREE (new_instance) = node; ++ SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; + SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; + SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; + SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; + SLP_INSTANCE_LOADS (new_instance) = loads; + SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; + SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; +- if (VEC_length (slp_tree, loads)) + - /* If op0 is an external or constant def use a vector type with - the same size as the output vector type. */ - if (!vectype_in) -@@ -3190,18 +3498,6 @@ ++ if (loads_permuted) + { + if (!vect_supported_load_permutation_p (new_instance, group_size, + load_permutation)) +@@ -1396,6 +1670,7 @@ + imm_use_iterator imm_iter; + gimple use_stmt; + stmt_vec_info stmt_vinfo; ++ slp_void_p child; - gcc_assert (ncopies >= 1); + if (!node) + return; +@@ -1413,8 +1688,8 @@ + == vect_reduction_def)) + vect_mark_slp_stmts (node, hybrid, i); -- op_type = TREE_CODE_LENGTH (code); -- if (op_type == binary_op) -- { -- op1 = gimple_assign_rhs2 (stmt); -- if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) -- { -- if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "use not simple."); -- return false; -- } -- } -- - /* Supportable by target? */ - if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, - &decl1, &decl2, &code1, &code2, -@@ -3227,6 +3523,14 @@ - fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", - ncopies); +- vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); +- vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_detect_hybrid_slp_stmts ((slp_tree) child); + } -+ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) -+ { -+ if (CONSTANT_CLASS_P (op0)) -+ op0 = fold_convert (TREE_TYPE (op1), op0); -+ else if (CONSTANT_CLASS_P (op1)) -+ op1 = fold_convert (TREE_TYPE (op0), op1); -+ } -+ - /* Handle def. */ - /* In case of multi-step promotion, we first generate promotion operations - to the intermediate types, and then from that types to the final one. -@@ -3260,6 +3564,8 @@ - if (op_type == binary_op) - vec_oprnds1 = VEC_alloc (tree, heap, 1); - } -+ else if (code == WIDEN_LSHIFT_EXPR) -+ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); - /* In case the vectorization factor (VF) is bigger than the number - of elements that we can fit in a vectype (nunits), we have to generate -@@ -3273,15 +3579,33 @@ - if (j == 0) - { - if (slp_node) -- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, -- &vec_oprnds1, -1); -- else -+ { -+ if (code == WIDEN_LSHIFT_EXPR) -+ { -+ vec_oprnd1 = op1; -+ /* Store vec_oprnd1 for every vector stmt to be created -+ for SLP_NODE. We check during the analysis that all -+ the shift arguments are the same. */ -+ for (k = 0; k < slp_node->vec_stmts_size - 1; k++) -+ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); -+ -+ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, -+ slp_node, -1); -+ } -+ else -+ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, -+ &vec_oprnds1, slp_node, -1); -+ } -+ else - { - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); - VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); - if (op_type == binary_op) - { -- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); -+ if (code == WIDEN_LSHIFT_EXPR) -+ vec_oprnd1 = op1; -+ else -+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); - VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); - } - } -@@ -3292,7 +3616,10 @@ - VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0); - if (op_type == binary_op) - { -- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); -+ if (code == WIDEN_LSHIFT_EXPR) -+ vec_oprnd1 = op1; -+ else -+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); - VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); - } - } -@@ -3337,6 +3664,7 @@ - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; - tree vectype = STMT_VINFO_VECTYPE (stmt_info); -+ tree elem_type; - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = NULL; - enum machine_mode vec_mode; -@@ -3352,6 +3680,7 @@ - int j; - gimple next_stmt, first_stmt = NULL; - bool strided_store = false; -+ bool store_lanes_p = false; - unsigned int group_size, i; - VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; - bool inv_p; -@@ -3359,6 +3688,7 @@ - bool slp = (slp_node != NULL); - unsigned int vec_num; - bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); -+ tree aggr_type; +@@ -1504,13 +1779,14 @@ + bool dummy; + int i; + gimple stmt; ++ slp_void_p child; - if (loop_vinfo) - loop = LOOP_VINFO_LOOP (loop_vinfo); -@@ -3412,7 +3742,8 @@ + if (!node) + return true; - /* The scalar rhs type needs to be trivially convertible to the vector - component type. This should always be the case. */ -- if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op))) -+ elem_type = TREE_TYPE (vectype); -+ if (!useless_type_conversion_p (elem_type, TREE_TYPE (op))) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "??? operands of different types"); -@@ -3439,9 +3770,14 @@ - { - strided_store = true; - first_stmt = DR_GROUP_FIRST_DR (stmt_info); -- if (!vect_strided_store_supported (vectype) -- && !PURE_SLP_STMT (stmt_info) && !slp) -- return false; -+ if (!slp && !PURE_SLP_STMT (stmt_info)) -+ { -+ group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); -+ if (vect_store_lanes_supported (vectype, group_size)) -+ store_lanes_p = true; -+ else if (!vect_strided_store_supported (vectype, group_size)) -+ return false; -+ } +- if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node)) +- || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node))) +- return false; ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child)) ++ return false; - if (first_stmt == stmt) - { -@@ -3467,7 +3803,7 @@ - if (!vec_stmt) /* transformation not required. */ + FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt) { - STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; -- vect_model_store_cost (stmt_info, ncopies, dt, NULL); -+ vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL); - return true; - } - -@@ -3501,6 +3837,7 @@ - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); - first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); -+ op = gimple_assign_rhs1 (first_stmt); - } - else - /* VEC_NUM is the number of vect stmts to be created for this -@@ -3522,6 +3859,16 @@ +@@ -1661,42 +1937,18 @@ - alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); - gcc_assert (alignment_support_scheme); -+ /* Targets with store-lane instructions must not require explicit -+ realignment. */ -+ gcc_assert (!store_lanes_p -+ || alignment_support_scheme == dr_aligned -+ || alignment_support_scheme == dr_unaligned_supported); -+ -+ if (store_lanes_p) -+ aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); -+ else -+ aggr_type = vectype; + /* Check if the basic block can be vectorized. */ - /* In case the vectorization factor (VF) is bigger than the number - of elements that we can fit in a vectype (nunits), we have to generate -@@ -3573,8 +3920,8 @@ - if (slp) - { - /* Get vectorized arguments for SLP_NODE. */ -- vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds, -- NULL, -1); -+ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, -+ NULL, slp_node, -1); +-bb_vec_info +-vect_slp_analyze_bb (basic_block bb) ++static bb_vec_info ++vect_slp_analyze_bb_1 (basic_block bb) + { + bb_vec_info bb_vinfo; + VEC (ddr_p, heap) *ddrs; + VEC (slp_instance, heap) *slp_instances; + slp_instance instance; +- int i, insns = 0; +- gimple_stmt_iterator gsi; ++ int i; + int min_vf = 2; + int max_vf = MAX_VECTORIZATION_FACTOR; + bool data_dependence_in_bb = false; - vec_oprnd = VEC_index (tree, vec_oprnds, 0); - } -@@ -3610,9 +3957,9 @@ - /* We should have catched mismatched types earlier. */ - gcc_assert (useless_type_conversion_p (vectype, - TREE_TYPE (vec_oprnd))); -- dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE, -- &dummy, &ptr_incr, false, -- &inv_p); -+ dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL, -+ NULL_TREE, &dummy, -+ &ptr_incr, false, &inv_p); - gcc_assert (bb_vinfo || !inv_p); - } - else -@@ -3633,76 +3980,101 @@ - VEC_replace(tree, dr_chain, i, vec_oprnd); - VEC_replace(tree, oprnds, i, vec_oprnd); - } -- dataref_ptr = -- bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); -- } +- current_vector_size = 0; - -- if (strided_store) -- { -- result_chain = VEC_alloc (tree, heap, group_size); -- /* Permute. */ -- if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, -- &result_chain)) -- return false; -+ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, -+ TYPE_SIZE_UNIT (aggr_type)); - } +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); +- +- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) +- { +- gimple stmt = gsi_stmt (gsi); +- if (!is_gimple_debug (stmt) +- && !gimple_nop_p (stmt) +- && gimple_code (stmt) != GIMPLE_LABEL) +- insns++; +- } +- +- if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) +- { +- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +- fprintf (vect_dump, "not vectorized: too many instructions in basic " +- "block.\n"); +- +- return NULL; +- } +- + bb_vinfo = new_bb_vec_info (bb); + if (!bb_vinfo) + return NULL; +@@ -1722,6 +1974,8 @@ + return NULL; + } -- next_stmt = first_stmt; -- for (i = 0; i < vec_num; i++) -+ if (store_lanes_p) - { -- struct ptr_info_def *pi; -+ tree vec_array; ++ vect_pattern_recog (NULL, bb_vinfo); ++ + if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, + &data_dependence_in_bb) + || min_vf > max_vf +@@ -1756,16 +2010,6 @@ + return NULL; + } -- if (i > 0) -- /* Bump the vector pointer. */ -- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, -- NULL_TREE); +- if (!vect_verify_datarefs_alignment (NULL, bb_vinfo)) +- { +- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +- fprintf (vect_dump, "not vectorized: unsupported alignment in basic " +- "block.\n"); - -- if (slp) -- vec_oprnd = VEC_index (tree, vec_oprnds, i); -- else if (strided_store) -- /* For strided stores vectorized defs are interleaved in -- vect_permute_store_chain(). */ -- vec_oprnd = VEC_index (tree, result_chain, i); +- destroy_bb_vec_info (bb_vinfo); +- return NULL; +- } - -- data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, -- build_int_cst (reference_alias_ptr_type -- (DR_REF (first_dr)), 0)); -- pi = get_ptr_info (dataref_ptr); -- pi->align = TYPE_ALIGN_UNIT (vectype); -- if (aligned_access_p (first_dr)) -- pi->misalign = 0; -- else if (DR_MISALIGNMENT (first_dr) == -1) -+ /* Combine all the vectors into an array. */ -+ vec_array = create_vector_array (vectype, vec_num); -+ for (i = 0; i < vec_num; i++) - { -- TREE_TYPE (data_ref) -- = build_aligned_type (TREE_TYPE (data_ref), -- TYPE_ALIGN (TREE_TYPE (vectype))); -- pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); -- pi->misalign = 0; -- } -- else -- { -- TREE_TYPE (data_ref) -- = build_aligned_type (TREE_TYPE (data_ref), -- TYPE_ALIGN (TREE_TYPE (vectype))); -- pi->misalign = DR_MISALIGNMENT (first_dr); -+ vec_oprnd = VEC_index (tree, dr_chain, i); -+ write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); - } + /* Check the SLP opportunities in the basic block, analyze and build SLP + trees. */ + if (!vect_analyze_slp (NULL, bb_vinfo)) +@@ -1788,6 +2032,16 @@ + vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance)); + } -- /* Arguments are ready. Create the new vector stmt. */ -- new_stmt = gimple_build_assign (data_ref, vec_oprnd); -+ /* Emit: -+ MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ -+ data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); -+ new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array); -+ gimple_call_set_lhs (new_stmt, data_ref); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - mark_symbols_for_renaming (new_stmt); -+ } -+ else -+ { -+ new_stmt = NULL; -+ if (strided_store) -+ { -+ result_chain = VEC_alloc (tree, heap, group_size); -+ /* Permute. */ -+ vect_permute_store_chain (dr_chain, group_size, stmt, gsi, -+ &result_chain); -+ } ++ if (!vect_verify_datarefs_alignment (NULL, bb_vinfo)) ++ { ++ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) ++ fprintf (vect_dump, "not vectorized: unsupported alignment in basic " ++ "block.\n"); ++ ++ destroy_bb_vec_info (bb_vinfo); ++ return NULL; ++ } ++ + if (!vect_slp_analyze_operations (bb_vinfo)) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) +@@ -1816,6 +2070,61 @@ + } -- if (slp) -- continue; -+ next_stmt = first_stmt; -+ for (i = 0; i < vec_num; i++) -+ { -+ struct ptr_info_def *pi; + ++bb_vec_info ++vect_slp_analyze_bb (basic_block bb) ++{ ++ bb_vec_info bb_vinfo; ++ int insns = 0; ++ gimple_stmt_iterator gsi; ++ unsigned int vector_sizes; + -+ if (i > 0) -+ /* Bump the vector pointer. */ -+ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, -+ stmt, NULL_TREE); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); + -+ if (slp) -+ vec_oprnd = VEC_index (tree, vec_oprnds, i); -+ else if (strided_store) -+ /* For strided stores vectorized defs are interleaved in -+ vect_permute_store_chain(). */ -+ vec_oprnd = VEC_index (tree, result_chain, i); ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple stmt = gsi_stmt (gsi); ++ if (!is_gimple_debug (stmt) ++ && !gimple_nop_p (stmt) ++ && gimple_code (stmt) != GIMPLE_LABEL) ++ insns++; ++ } + -+ data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, -+ build_int_cst (reference_alias_ptr_type -+ (DR_REF (first_dr)), 0)); -+ pi = get_ptr_info (dataref_ptr); -+ pi->align = TYPE_ALIGN_UNIT (vectype); -+ if (aligned_access_p (first_dr)) -+ pi->misalign = 0; -+ else if (DR_MISALIGNMENT (first_dr) == -1) ++ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) ++ { ++ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) ++ fprintf (vect_dump, "not vectorized: too many instructions in basic " ++ "block.\n"); ++ ++ return NULL; ++ } ++ ++ /* Autodetect first vector size we try. */ ++ current_vector_size = 0; ++ vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); ++ ++ while (1) ++ { ++ bb_vinfo = vect_slp_analyze_bb_1 (bb); ++ if (bb_vinfo) ++ return bb_vinfo; ++ ++ destroy_bb_vec_info (bb_vinfo); ++ ++ vector_sizes &= ~current_vector_size; ++ if (vector_sizes == 0 ++ || current_vector_size == 0) ++ return NULL; ++ ++ /* Try the next biggest vector size. */ ++ current_vector_size = 1 << floor_log2 (vector_sizes); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "***** Re-trying analysis with " ++ "vector size %d\n", current_vector_size); ++ } ++} ++ ++ + /* SLP costs are calculated according to SLP instance unrolling factor (i.e., + the number of created vector stmts depends on the unrolling factor). + However, the actual number of vector stmts for every SLP node depends on +@@ -1939,15 +2248,15 @@ + + For example, we have two scalar operands, s1 and s2 (e.g., group of + strided accesses of size two), while NUNITS is four (i.e., four scalars +- of this type can be packed in a vector). The output vector will contain +- two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES ++ of this type can be packed in a vector). The output vector will contain ++ two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES + will be 2). + + If GROUP_SIZE > NUNITS, the scalars will be split into several vectors + containing the operands. + + For example, NUNITS is four as before, and the group size is 8 +- (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and ++ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and + {s5, s6, s7, s8}. */ + + number_of_copies = least_common_multiple (nunits, group_size) / group_size; +@@ -1959,8 +2268,18 @@ + { + if (is_store) + op = gimple_assign_rhs1 (stmt); +- else ++ else if (gimple_assign_rhs_code (stmt) != COND_EXPR) + op = gimple_op (stmt, op_num + 1); ++ else ++ { ++ if (op_num == 0 || op_num == 1) + { -+ TREE_TYPE (data_ref) -+ = build_aligned_type (TREE_TYPE (data_ref), -+ TYPE_ALIGN (elem_type)); -+ pi->align = TYPE_ALIGN_UNIT (elem_type); -+ pi->misalign = 0; ++ tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); ++ op = TREE_OPERAND (cond, op_num); + } + else -+ { -+ TREE_TYPE (data_ref) -+ = build_aligned_type (TREE_TYPE (data_ref), -+ TYPE_ALIGN (elem_type)); -+ pi->misalign = DR_MISALIGNMENT (first_dr); -+ } -+ -+ /* Arguments are ready. Create the new vector stmt. */ -+ new_stmt = gimple_build_assign (data_ref, vec_oprnd); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ mark_symbols_for_renaming (new_stmt); ++ op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1); ++ } + + if (reduc_index != -1) + { +@@ -2055,88 +2374,102 @@ + If the scalar definitions are loop invariants or constants, collect them and + call vect_get_constant_vectors() to create vector stmts. + Otherwise, the def-stmts must be already vectorized and the vectorized stmts +- must be stored in the LEFT/RIGHT node of SLP_NODE, and we call +- vect_get_slp_vect_defs() to retrieve them. +- If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from +- the right node. This is used when the second operand must remain scalar. */ ++ must be stored in the corresponding child of SLP_NODE, and we call ++ vect_get_slp_vect_defs () to retrieve them. */ + + void +-vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node, +- VEC (tree,heap) **vec_oprnds0, +- VEC (tree,heap) **vec_oprnds1, int reduc_index) +-{ +- gimple first_stmt; +- enum tree_code code; +- int number_of_vects; ++vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node, ++ VEC (slp_void_p, heap) **vec_oprnds, int reduc_index) ++{ ++ gimple first_stmt, first_def; ++ int number_of_vects = 0, i; ++ unsigned int child_index = 0; + HOST_WIDE_INT lhs_size_unit, rhs_size_unit; ++ slp_tree child = NULL; ++ VEC (tree, heap) *vec_defs; ++ tree oprnd, def_lhs; ++ bool vectorized_defs; + + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); +- /* The number of vector defs is determined by the number of vector statements +- in the node from which we get those statements. */ +- if (SLP_TREE_LEFT (slp_node)) +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node)); +- else +- { +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); +- /* Number of vector stmts was calculated according to LHS in +- vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if +- necessary. See vect_get_smallest_scalar_type () for details. */ +- vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, +- &rhs_size_unit); +- if (rhs_size_unit != lhs_size_unit) +- { +- number_of_vects *= rhs_size_unit; +- number_of_vects /= lhs_size_unit; +- } +- } ++ FOR_EACH_VEC_ELT (tree, ops, i, oprnd) ++ { ++ /* For each operand we check if it has vectorized definitions in a child ++ node or we need to create them (for invariants and constants). We ++ check if the LHS of the first stmt of the next child matches OPRND. ++ If it does, we found the correct child. Otherwise, we call ++ vect_get_constant_vectors (), and not advance CHILD_INDEX in order ++ to check this child node for the next operand. */ ++ vectorized_defs = false; ++ if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index) ++ { ++ child = (slp_tree) VEC_index (slp_void_p, ++ SLP_TREE_CHILDREN (slp_node), ++ child_index); ++ first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0); + -+ if (slp) -+ continue; ++ /* In the end of a pattern sequence we have a use of the original stmt, ++ so we need to compare OPRND with the original def. */ ++ if (is_pattern_stmt_p (vinfo_for_stmt (first_def)) ++ && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt)) ++ && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt))) ++ first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); -- if (j == 0) -- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; -+ next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); -+ if (!next_stmt) -+ break; -+ } -+ } -+ if (!slp) -+ { -+ if (j == 0) -+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; - else - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; +- /* Allocate memory for vectorized defs. */ +- *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); - - prev_stmt_info = vinfo_for_stmt (new_stmt); -- next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); -- if (!next_stmt) -- break; - } - } +- /* SLP_NODE corresponds either to a group of stores or to a group of +- unary/binary operations. We don't call this function for loads. +- For reduction defs we call vect_get_constant_vectors(), since we are +- looking for initial loop invariant values. */ +- if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) +- /* The defs are already vectorized. */ +- vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); +- else +- /* Build vectors from scalar defs. */ +- vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects, +- reduc_index); +- +- if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) +- /* Since we don't call this function with loads, this is a group of +- stores. */ +- return; +- +- /* For reductions, we only need initial values. */ +- if (reduc_index != -1) +- return; ++ if (is_gimple_call (first_def)) ++ def_lhs = gimple_call_lhs (first_def); ++ else ++ def_lhs = gimple_assign_lhs (first_def); -@@ -3813,6 +4185,7 @@ - bool nested_in_vect_loop = false; - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; - tree vectype = STMT_VINFO_VECTYPE (stmt_info); -+ tree elem_type; - tree new_temp; - enum machine_mode mode; - gimple new_stmt = NULL; -@@ -3829,6 +4202,7 @@ - gimple phi = NULL; - VEC(tree,heap) *dr_chain = NULL; - bool strided_load = false; -+ bool load_lanes_p = false; - gimple first_stmt; - tree scalar_type; - bool inv_p; -@@ -3841,6 +4215,7 @@ - enum tree_code code; - bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); - int vf; -+ tree aggr_type; +- code = gimple_assign_rhs_code (first_stmt); +- if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1) +- return; ++ if (operand_equal_p (oprnd, def_lhs, 0)) ++ { ++ /* The number of vector defs is determined by the number of ++ vector statements in the node from which we get those ++ statements. */ ++ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); ++ vectorized_defs = true; ++ child_index++; ++ } ++ } - if (loop_vinfo) - { -@@ -3917,7 +4292,8 @@ +- /* The number of vector defs is determined by the number of vector statements +- in the node from which we get those statements. */ +- if (SLP_TREE_RIGHT (slp_node)) +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node)); +- else +- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++ if (!vectorized_defs) ++ { ++ if (i == 0) ++ { ++ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++ /* Number of vector stmts was calculated according to LHS in ++ vect_schedule_slp_instance (), fix it by replacing LHS with ++ RHS, if necessary. See vect_get_smallest_scalar_type () for ++ details. */ ++ vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, ++ &rhs_size_unit); ++ if (rhs_size_unit != lhs_size_unit) ++ { ++ number_of_vects *= rhs_size_unit; ++ number_of_vects /= lhs_size_unit; ++ } ++ } ++ } - /* The vector component type needs to be trivially convertible to the - scalar lhs. This should always be the case. */ -- if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype))) -+ elem_type = TREE_TYPE (vectype); -+ if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "??? operands of different types"); -@@ -3931,10 +4307,15 @@ - /* FORNOW */ - gcc_assert (! nested_in_vect_loop); +- *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects); ++ /* Allocate memory for vectorized defs. */ ++ vec_defs = VEC_alloc (tree, heap, number_of_vects); -- /* Check if interleaving is supported. */ -- if (!vect_strided_load_supported (vectype) -- && !PURE_SLP_STMT (stmt_info) && !slp) -- return false; -+ first_stmt = DR_GROUP_FIRST_DR (stmt_info); -+ if (!slp && !PURE_SLP_STMT (stmt_info)) -+ { -+ group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); -+ if (vect_load_lanes_supported (vectype, group_size)) -+ load_lanes_p = true; -+ else if (!vect_strided_load_supported (vectype, group_size)) -+ return false; -+ } - } +- if (SLP_TREE_RIGHT (slp_node)) +- /* The defs are already vectorized. */ +- vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); +- else +- /* Build vectors from scalar defs. */ +- vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects, +- -1); ++ /* For reduction defs we call vect_get_constant_vectors (), since we are ++ looking for initial loop invariant values. */ ++ if (vectorized_defs && reduc_index == -1) ++ /* The defs are already vectorized. */ ++ vect_get_slp_vect_defs (child, &vec_defs); ++ else ++ /* Build vectors from scalar defs. */ ++ vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, ++ number_of_vects, reduc_index); ++ ++ VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs); ++ ++ /* For reductions, we only need initial values. */ ++ if (reduc_index != -1) ++ return; ++ } + } - if (negative) -@@ -3959,18 +4340,23 @@ - if (!vec_stmt) /* transformation not required. */ - { - STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; -- vect_model_load_cost (stmt_info, ncopies, NULL); -+ vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL); - return true; - } +- + /* Create NCOPIES permutation statements using the mask MASK_BYTES (by + building a vector of type MASK_TYPE from it) and two input vectors placed in + DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and +@@ -2453,14 +2786,14 @@ + tree vectype; + int i; + slp_tree loads_node; ++ slp_void_p child; - if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "transform load."); -+ fprintf (vect_dump, "transform load. ncopies = %d", ncopies); + if (!node) + return false; - /** Transform. **/ +- vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance, +- vectorization_factor); +- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance, +- vectorization_factor); ++ FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child) ++ vect_schedule_slp_instance ((slp_tree) child, instance, ++ vectorization_factor); - if (strided_load) - { - first_stmt = DR_GROUP_FIRST_DR (stmt_info); -+ if (slp -+ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) -+ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) -+ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); -+ - /* Check if the chain of loads is already vectorized. */ - if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) - { -@@ -3990,8 +4376,6 @@ - } - else - vec_num = group_size; -- -- dr_chain = VEC_alloc (tree, heap, vec_num); - } + stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); + stmt_info = vinfo_for_stmt (stmt); +@@ -2507,8 +2840,11 @@ + /* Loads should be inserted before the first load. */ + if (SLP_INSTANCE_FIRST_LOAD_STMT (instance) + && STMT_VINFO_STRIDED_ACCESS (stmt_info) +- && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))) ++ && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)) ++ && SLP_INSTANCE_LOAD_PERMUTATION (instance)) + si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance)); ++ else if (is_pattern_stmt_p (stmt_info)) ++ si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); else - { -@@ -4002,6 +4386,11 @@ + si = gsi_for_stmt (stmt); - alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); - gcc_assert (alignment_support_scheme); -+ /* Targets with load-lane instructions must not require explicit -+ realignment. */ -+ gcc_assert (!load_lanes_p -+ || alignment_support_scheme == dr_aligned -+ || alignment_support_scheme == dr_unaligned_supported); +--- a/src/gcc/tree-vect-stmts.c ++++ b/src/gcc/tree-vect-stmts.c +@@ -42,6 +42,82 @@ + #include "langhooks.h" - /* In case the vectorization factor (VF) is bigger than the number - of elements that we can fit in a vectype (nunits), we have to generate -@@ -4133,208 +4522,252 @@ - if (negative) - offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); -+ if (load_lanes_p) -+ aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); -+ else -+ aggr_type = vectype; ++/* Return a variable of type ELEM_TYPE[NELEMS]. */ + - prev_stmt_info = NULL; - for (j = 0; j < ncopies; j++) - { - /* 1. Create the vector pointer update chain. */ - if (j == 0) -- dataref_ptr = vect_create_data_ref_ptr (first_stmt, -+ dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, - at_loop, offset, - &dummy, &ptr_incr, false, - &inv_p); - else -- dataref_ptr = -- bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); -+ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, -+ TYPE_SIZE_UNIT (aggr_type)); ++static tree ++create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems) ++{ ++ return create_tmp_var (build_array_type_nelts (elem_type, nelems), ++ "vect_array"); ++} + -+ if (strided_load || slp_perm) -+ dr_chain = VEC_alloc (tree, heap, vec_num); - -- for (i = 0; i < vec_num; i++) -+ if (load_lanes_p) - { -- if (i > 0) -- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, -- NULL_TREE); -+ tree vec_array; ++/* ARRAY is an array of vectors created by create_vector_array. ++ Return an SSA_NAME for the vector in index N. The reference ++ is part of the vectorization of STMT and the vector is associated ++ with scalar destination SCALAR_DEST. */ ++ ++static tree ++read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest, ++ tree array, unsigned HOST_WIDE_INT n) ++{ ++ tree vect_type, vect, vect_name, array_ref; ++ gimple new_stmt; ++ ++ gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE); ++ vect_type = TREE_TYPE (TREE_TYPE (array)); ++ vect = vect_create_destination_var (scalar_dest, vect_type); ++ array_ref = build4 (ARRAY_REF, vect_type, array, ++ build_int_cst (size_type_node, n), ++ NULL_TREE, NULL_TREE); ++ ++ new_stmt = gimple_build_assign (vect, array_ref); ++ vect_name = make_ssa_name (vect, new_stmt); ++ gimple_assign_set_lhs (new_stmt, vect_name); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); ++ ++ return vect_name; ++} ++ ++/* ARRAY is an array of vectors created by create_vector_array. ++ Emit code to store SSA_NAME VECT in index N of the array. ++ The store is part of the vectorization of STMT. */ ++ ++static void ++write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect, ++ tree array, unsigned HOST_WIDE_INT n) ++{ ++ tree array_ref; ++ gimple new_stmt; ++ ++ array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array, ++ build_int_cst (size_type_node, n), ++ NULL_TREE, NULL_TREE); ++ ++ new_stmt = gimple_build_assign (array_ref, vect); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); ++} ++ ++/* PTR is a pointer to an array of type TYPE. Return a representation ++ of *PTR. The memory reference replaces those in FIRST_DR ++ (and its group). */ ++ ++static tree ++create_array_ref (tree type, tree ptr, struct data_reference *first_dr) ++{ ++ struct ptr_info_def *pi; ++ tree mem_ref, alias_ptr_type; ++ ++ alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr)); ++ mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0)); ++ /* Arrays have the same alignment as their type. */ ++ pi = get_ptr_info (ptr); ++ pi->align = TYPE_ALIGN_UNIT (type); ++ pi->misalign = 0; ++ return mem_ref; ++} ++ + /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ -- /* 2. Create the vector-load in the loop. */ -- switch (alignment_support_scheme) -- { -- case dr_aligned: -- case dr_unaligned_supported: -- { -- struct ptr_info_def *pi; -- data_ref -- = build2 (MEM_REF, vectype, dataref_ptr, -- build_int_cst (reference_alias_ptr_type -- (DR_REF (first_dr)), 0)); -- pi = get_ptr_info (dataref_ptr); -- pi->align = TYPE_ALIGN_UNIT (vectype); -- if (alignment_support_scheme == dr_aligned) -- { -- gcc_assert (aligned_access_p (first_dr)); -- pi->misalign = 0; -- } -- else if (DR_MISALIGNMENT (first_dr) == -1) -- { -- TREE_TYPE (data_ref) -- = build_aligned_type (TREE_TYPE (data_ref), -- TYPE_ALIGN (TREE_TYPE (vectype))); -- pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); -- pi->misalign = 0; -- } -- else -- { -- TREE_TYPE (data_ref) -- = build_aligned_type (TREE_TYPE (data_ref), -- TYPE_ALIGN (TREE_TYPE (vectype))); -- pi->misalign = DR_MISALIGNMENT (first_dr); -- } -- break; -- } -- case dr_explicit_realign: -- { -- tree ptr, bump; -- tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); -+ vec_array = create_vector_array (vectype, vec_num); + /* Function vect_mark_relevant. +@@ -50,33 +126,72 @@ -- if (compute_in_loop) -- msq = vect_setup_realignment (first_stmt, gsi, -- &realignment_token, -- dr_explicit_realign, -- dataref_ptr, NULL); -- -- new_stmt = gimple_build_assign_with_ops -- (BIT_AND_EXPR, NULL_TREE, dataref_ptr, -- build_int_cst -- (TREE_TYPE (dataref_ptr), -- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); -- ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); -- gimple_assign_set_lhs (new_stmt, ptr); -- vect_finish_stmt_generation (stmt, new_stmt, gsi); -- data_ref -- = build2 (MEM_REF, vectype, ptr, -- build_int_cst (reference_alias_ptr_type -- (DR_REF (first_dr)), 0)); -- vec_dest = vect_create_destination_var (scalar_dest, vectype); -- new_stmt = gimple_build_assign (vec_dest, data_ref); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_assign_set_lhs (new_stmt, new_temp); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- vect_finish_stmt_generation (stmt, new_stmt, gsi); -- msq = new_temp; -- -- bump = size_binop (MULT_EXPR, vs_minus_1, -- TYPE_SIZE_UNIT (scalar_type)); -- ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); -- new_stmt = gimple_build_assign_with_ops -- (BIT_AND_EXPR, NULL_TREE, ptr, -- build_int_cst -- (TREE_TYPE (ptr), -- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); -- ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); -- gimple_assign_set_lhs (new_stmt, ptr); -- vect_finish_stmt_generation (stmt, new_stmt, gsi); -- data_ref -- = build2 (MEM_REF, vectype, ptr, -- build_int_cst (reference_alias_ptr_type -- (DR_REF (first_dr)), 0)); -- break; -- } -- case dr_explicit_realign_optimized: -- new_stmt = gimple_build_assign_with_ops -- (BIT_AND_EXPR, NULL_TREE, dataref_ptr, -- build_int_cst -- (TREE_TYPE (dataref_ptr), -- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); -- new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); -- gimple_assign_set_lhs (new_stmt, new_temp); -- vect_finish_stmt_generation (stmt, new_stmt, gsi); -- data_ref -- = build2 (MEM_REF, vectype, new_temp, -- build_int_cst (reference_alias_ptr_type -- (DR_REF (first_dr)), 0)); -- break; -- default: -- gcc_unreachable (); -- } -- vec_dest = vect_create_destination_var (scalar_dest, vectype); -- new_stmt = gimple_build_assign (vec_dest, data_ref); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_assign_set_lhs (new_stmt, new_temp); -+ /* Emit: -+ VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ -+ data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); -+ new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); -+ gimple_call_set_lhs (new_stmt, vec_array); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - mark_symbols_for_renaming (new_stmt); + static void + vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, +- enum vect_relevant relevant, bool live_p) ++ enum vect_relevant relevant, bool live_p, ++ bool used_in_pattern) + { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info); + bool save_live_p = STMT_VINFO_LIVE_P (stmt_info); ++ gimple pattern_stmt; -- /* 3. Handle explicit realignment if necessary/supported. Create in -- loop: vec_dest = realign_load (msq, lsq, realignment_token) */ -- if (alignment_support_scheme == dr_explicit_realign_optimized -- || alignment_support_scheme == dr_explicit_realign) -+ /* Extract each vector into an SSA_NAME. */ -+ for (i = 0; i < vec_num; i++) -+ { -+ new_temp = read_vector_array (stmt, gsi, scalar_dest, -+ vec_array, i); -+ VEC_quick_push (tree, dr_chain, new_temp); -+ } -+ -+ /* Record the mapping between SSA_NAMEs and statements. */ -+ vect_record_strided_load_vectors (stmt, dr_chain); -+ } -+ else -+ { -+ for (i = 0; i < vec_num; i++) - { -- tree tmp; -+ if (i > 0) -+ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, -+ stmt, NULL_TREE); + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p); -- lsq = gimple_assign_lhs (new_stmt); -- if (!realignment_token) -- realignment_token = dataref_ptr; -+ /* 2. Create the vector-load in the loop. */ -+ switch (alignment_support_scheme) -+ { -+ case dr_aligned: -+ case dr_unaligned_supported: -+ { -+ struct ptr_info_def *pi; -+ data_ref -+ = build2 (MEM_REF, vectype, dataref_ptr, -+ build_int_cst (reference_alias_ptr_type -+ (DR_REF (first_dr)), 0)); -+ pi = get_ptr_info (dataref_ptr); -+ pi->align = TYPE_ALIGN_UNIT (vectype); -+ if (alignment_support_scheme == dr_aligned) -+ { -+ gcc_assert (aligned_access_p (first_dr)); -+ pi->misalign = 0; -+ } -+ else if (DR_MISALIGNMENT (first_dr) == -1) -+ { -+ TREE_TYPE (data_ref) -+ = build_aligned_type (TREE_TYPE (data_ref), -+ TYPE_ALIGN (elem_type)); -+ pi->align = TYPE_ALIGN_UNIT (elem_type); -+ pi->misalign = 0; -+ } -+ else -+ { -+ TREE_TYPE (data_ref) -+ = build_aligned_type (TREE_TYPE (data_ref), -+ TYPE_ALIGN (elem_type)); -+ pi->misalign = DR_MISALIGNMENT (first_dr); -+ } -+ break; -+ } -+ case dr_explicit_realign: -+ { -+ tree ptr, bump; -+ tree vs_minus_1 -+ = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); ++ /* If this stmt is an original stmt in a pattern, we might need to mark its ++ related pattern stmt instead of the original stmt. However, such stmts ++ may have their own uses that are not in any pattern, in such cases the ++ stmt itself should be marked. */ + if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + { +- gimple pattern_stmt; ++ bool found = false; ++ if (!used_in_pattern) ++ { ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ gimple use_stmt; ++ tree lhs; + -+ if (compute_in_loop) -+ msq = vect_setup_realignment (first_stmt, gsi, -+ &realignment_token, -+ dr_explicit_realign, -+ dataref_ptr, NULL); ++ if (is_gimple_assign (stmt)) ++ lhs = gimple_assign_lhs (stmt); ++ else ++ lhs = gimple_call_lhs (stmt); + -+ new_stmt = gimple_build_assign_with_ops -+ (BIT_AND_EXPR, NULL_TREE, dataref_ptr, -+ build_int_cst -+ (TREE_TYPE (dataref_ptr), -+ -(HOST_WIDE_INT) -+ TYPE_ALIGN_UNIT (vectype))); -+ ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); -+ gimple_assign_set_lhs (new_stmt, ptr); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ data_ref -+ = build2 (MEM_REF, vectype, ptr, -+ build_int_cst (reference_alias_ptr_type -+ (DR_REF (first_dr)), 0)); -+ vec_dest = vect_create_destination_var (scalar_dest, -+ vectype); -+ new_stmt = gimple_build_assign (vec_dest, data_ref); -+ new_temp = make_ssa_name (vec_dest, new_stmt); -+ gimple_assign_set_lhs (new_stmt, new_temp); -+ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -+ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ msq = new_temp; ++ /* This use is out of pattern use, if LHS has other uses that are ++ pattern uses, we should mark the stmt itself, and not the pattern ++ stmt. */ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ if (is_gimple_debug (USE_STMT (use_p))) ++ continue; ++ use_stmt = USE_STMT (use_p); + -+ bump = size_binop (MULT_EXPR, vs_minus_1, -+ TYPE_SIZE_UNIT (scalar_type)); -+ ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); -+ new_stmt = gimple_build_assign_with_ops -+ (BIT_AND_EXPR, NULL_TREE, ptr, -+ build_int_cst -+ (TREE_TYPE (ptr), -+ -(HOST_WIDE_INT) -+ TYPE_ALIGN_UNIT (vectype))); -+ ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); -+ gimple_assign_set_lhs (new_stmt, ptr); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ data_ref -+ = build2 (MEM_REF, vectype, ptr, -+ build_int_cst (reference_alias_ptr_type -+ (DR_REF (first_dr)), 0)); -+ break; -+ } -+ case dr_explicit_realign_optimized: -+ new_stmt = gimple_build_assign_with_ops -+ (BIT_AND_EXPR, NULL_TREE, dataref_ptr, -+ build_int_cst -+ (TREE_TYPE (dataref_ptr), -+ -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); -+ new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), -+ new_stmt); -+ gimple_assign_set_lhs (new_stmt, new_temp); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ data_ref -+ = build2 (MEM_REF, vectype, new_temp, -+ build_int_cst (reference_alias_ptr_type -+ (DR_REF (first_dr)), 0)); -+ break; -+ default: -+ gcc_unreachable (); -+ } - vec_dest = vect_create_destination_var (scalar_dest, vectype); -- tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, -- realignment_token); -- new_stmt = gimple_build_assign (vec_dest, tmp); -+ new_stmt = gimple_build_assign (vec_dest, data_ref); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ mark_symbols_for_renaming (new_stmt); - -- if (alignment_support_scheme == dr_explicit_realign_optimized) -+ /* 3. Handle explicit realignment if necessary/supported. -+ Create in loop: -+ vec_dest = realign_load (msq, lsq, realignment_token) */ -+ if (alignment_support_scheme == dr_explicit_realign_optimized -+ || alignment_support_scheme == dr_explicit_realign) - { -- gcc_assert (phi); -- if (i == vec_num - 1 && j == ncopies - 1) -- add_phi_arg (phi, lsq, loop_latch_edge (containing_loop), -- UNKNOWN_LOCATION); -- msq = lsq; -- } -- } -+ tree tmp; ++ if (vinfo_for_stmt (use_stmt) ++ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) ++ { ++ found = true; ++ break; ++ } ++ } ++ } -- /* 4. Handle invariant-load. */ -- if (inv_p && !bb_vinfo) -- { -- gcc_assert (!strided_load); -- gcc_assert (nested_in_vect_loop_p (loop, stmt)); -- if (j == 0) -- { -- int k; -- tree t = NULL_TREE; -- tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); +- /* This is the last stmt in a sequence that was detected as a +- pattern that can potentially be vectorized. Don't mark the stmt +- as relevant/live because it's not going to be vectorized. +- Instead mark the pattern-stmt that replaces it. */ - -- /* CHECKME: bitpos depends on endianess? */ -- bitpos = bitsize_zero_node; -- vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, -- bitsize, bitpos); -- vec_dest = -- vect_create_destination_var (scalar_dest, NULL_TREE); -- new_stmt = gimple_build_assign (vec_dest, vec_inv); -- new_temp = make_ssa_name (vec_dest, new_stmt); -+ lsq = gimple_assign_lhs (new_stmt); -+ if (!realignment_token) -+ realignment_token = dataref_ptr; -+ vec_dest = vect_create_destination_var (scalar_dest, vectype); -+ tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, -+ realignment_token); -+ new_stmt = gimple_build_assign (vec_dest, tmp); -+ new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (stmt, new_stmt, gsi); +- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); +- +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); +- stmt_info = vinfo_for_stmt (pattern_stmt); +- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); +- save_relevant = STMT_VINFO_RELEVANT (stmt_info); +- save_live_p = STMT_VINFO_LIVE_P (stmt_info); +- stmt = pattern_stmt; ++ if (!found) ++ { ++ /* This is the last stmt in a sequence that was detected as a ++ pattern that can potentially be vectorized. Don't mark the stmt ++ as relevant/live because it's not going to be vectorized. ++ Instead mark the pattern-stmt that replaces it. */ ++ ++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); ++ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "last stmt in pattern. don't mark" ++ " relevant/live."); ++ stmt_info = vinfo_for_stmt (pattern_stmt); ++ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt); ++ save_relevant = STMT_VINFO_RELEVANT (stmt_info); ++ save_live_p = STMT_VINFO_LIVE_P (stmt_info); ++ stmt = pattern_stmt; ++ } + } -- for (k = nunits - 1; k >= 0; --k) -- t = tree_cons (NULL_TREE, new_temp, t); -- /* FIXME: use build_constructor directly. */ -- vec_inv = build_constructor_from_list (vectype, t); -- new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); -- new_stmt = SSA_NAME_DEF_STMT (new_temp); -+ if (alignment_support_scheme == dr_explicit_realign_optimized) -+ { -+ gcc_assert (phi); -+ if (i == vec_num - 1 && j == ncopies - 1) -+ add_phi_arg (phi, lsq, -+ loop_latch_edge (containing_loop), -+ UNKNOWN_LOCATION); -+ msq = lsq; -+ } - } -- else -- gcc_unreachable (); /* FORNOW. */ -- } + STMT_VINFO_LIVE_P (stmt_info) |= live_p; +@@ -361,7 +476,8 @@ + } + } -- if (negative) +- vect_mark_relevant (worklist, def_stmt, relevant, live_p); ++ vect_mark_relevant (worklist, def_stmt, relevant, live_p, ++ is_pattern_stmt_p (stmt_vinfo)); + return true; + } + +@@ -418,7 +534,7 @@ + } + + if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p)) +- vect_mark_relevant (&worklist, phi, relevant, live_p); ++ vect_mark_relevant (&worklist, phi, relevant, live_p, false); + } + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { +@@ -430,7 +546,7 @@ + } + + if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p)) +- vect_mark_relevant (&worklist, stmt, relevant, live_p); ++ vect_mark_relevant (&worklist, stmt, relevant, live_p, false); + } + } + +@@ -529,15 +645,109 @@ + break; + } + +- FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) +- { +- tree op = USE_FROM_PTR (use_p); +- if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist)) - { -- new_temp = reverse_vec_elements (new_temp, stmt, gsi); -- new_stmt = SSA_NAME_DEF_STMT (new_temp); +- VEC_free (gimple, heap, worklist); +- return false; - } -+ /* 4. Handle invariant-load. */ -+ if (inv_p && !bb_vinfo) -+ { -+ gcc_assert (!strided_load); -+ gcc_assert (nested_in_vect_loop_p (loop, stmt)); -+ if (j == 0) -+ { -+ int k; -+ tree t = NULL_TREE; -+ tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); +- } ++ if (is_pattern_stmt_p (vinfo_for_stmt (stmt))) ++ { ++ /* Pattern statements are not inserted into the code, so ++ FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we ++ have to scan the RHS or function arguments instead. */ ++ if (is_gimple_assign (stmt)) ++ { ++ tree rhs = gimple_assign_rhs1 (stmt); ++ unsigned int op_num; ++ tree op; ++ enum tree_code rhs_code; ++ switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) ++ { ++ case GIMPLE_SINGLE_RHS: ++ op = gimple_assign_rhs1 (stmt); ++ rhs_code = gimple_assign_rhs_code (stmt); ++ i = 0; ++ if (rhs_code == COND_EXPR ++ && COMPARISON_CLASS_P (TREE_OPERAND (op, 0))) ++ { ++ op = TREE_OPERAND (op, 0); ++ if (!process_use (stmt, TREE_OPERAND (op, 0), ++ loop_vinfo, ++ live_p, relevant, &worklist) ++ || !process_use (stmt, TREE_OPERAND (op, 1), ++ loop_vinfo, ++ live_p, relevant, &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ i = 1; ++ } ++ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)); ++ for (i; i < op_num; i++) ++ { ++ op = TREE_OPERAND (rhs, i); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } ++ break; ++ ++ case GIMPLE_BINARY_RHS: ++ op = gimple_assign_rhs1 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ op = gimple_assign_rhs2 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ break; + -+ /* CHECKME: bitpos depends on endianess? */ -+ bitpos = bitsize_zero_node; -+ vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, -+ bitsize, bitpos); -+ vec_dest = vect_create_destination_var (scalar_dest, -+ NULL_TREE); -+ new_stmt = gimple_build_assign (vec_dest, vec_inv); -+ new_temp = make_ssa_name (vec_dest, new_stmt); -+ gimple_assign_set_lhs (new_stmt, new_temp); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ case GIMPLE_UNARY_RHS: ++ op = gimple_assign_rhs1 (stmt); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } + -+ for (k = nunits - 1; k >= 0; --k) -+ t = tree_cons (NULL_TREE, new_temp, t); -+ /* FIXME: use build_constructor directly. */ -+ vec_inv = build_constructor_from_list (vectype, t); -+ new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); -+ new_stmt = SSA_NAME_DEF_STMT (new_temp); -+ } -+ else -+ gcc_unreachable (); /* FORNOW. */ -+ } ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ else if (is_gimple_call (stmt)) ++ { ++ for (i = 0; i < gimple_call_num_args (stmt); i++) ++ { ++ tree arg = gimple_call_arg (stmt, i); ++ if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } ++ } ++ } ++ else ++ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) ++ { ++ tree op = USE_FROM_PTR (use_p); ++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ++ &worklist)) ++ { ++ VEC_free (gimple, heap, worklist); ++ return false; ++ } ++ } + } /* while worklist */ -- /* Collect vector loads and later create their permutation in -- vect_transform_strided_load (). */ -- if (strided_load || slp_perm) -- VEC_quick_push (tree, dr_chain, new_temp); -+ if (negative) -+ { -+ new_temp = reverse_vec_elements (new_temp, stmt, gsi); -+ new_stmt = SSA_NAME_DEF_STMT (new_temp); -+ } + VEC_free (gimple, heap, worklist); +@@ -688,7 +898,8 @@ -- /* Store vector loads in the corresponding SLP_NODE. */ -- if (slp && !slp_perm) -- VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); -+ /* Collect vector loads and later create their permutation in -+ vect_transform_strided_load (). */ -+ if (strided_load || slp_perm) -+ VEC_quick_push (tree, dr_chain, new_temp); -+ -+ /* Store vector loads in the corresponding SLP_NODE. */ -+ if (slp && !slp_perm) -+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), -+ new_stmt); -+ } - } + void + vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, +- enum vect_def_type dt, slp_tree slp_node) ++ bool store_lanes_p, enum vect_def_type dt, ++ slp_tree slp_node) + { + int group_size; + unsigned int inside_cost = 0, outside_cost = 0; +@@ -725,9 +936,11 @@ + first_dr = STMT_VINFO_DATA_REF (stmt_info); + } - if (slp && !slp_perm) -@@ -4353,12 +4786,9 @@ - { - if (strided_load) - { -- if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi)) -- return false; -- -+ if (!load_lanes_p) -+ vect_transform_strided_load (stmt, dr_chain, group_size, gsi); - *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); -- VEC_free (tree, heap, dr_chain); -- dr_chain = VEC_alloc (tree, heap, group_size); - } - else - { -@@ -4369,11 +4799,10 @@ - prev_stmt_info = vinfo_for_stmt (new_stmt); - } - } -+ if (dr_chain) -+ VEC_free (tree, heap, dr_chain); +- /* Is this an access in a group of stores, which provide strided access? +- If so, add in the cost of the permutes. */ +- if (group_size > 1) ++ /* We assume that the cost of a single store-lanes instruction is ++ equivalent to the cost of GROUP_SIZE separate stores. If a strided ++ access is instead being provided by a permute-and-store operation, ++ include the cost of the permutes. */ ++ if (!store_lanes_p && group_size > 1) + { + /* Uses a high and low interleave operation for each needed permute. */ + inside_cost = ncopies * exact_log2(group_size) * group_size +@@ -789,6 +1002,16 @@ + break; + } + ++ case dr_unaligned_unsupported: ++ { ++ *inside_cost = VECT_MAX_COST; ++ ++ if (vect_print_dump_info (REPORT_COST)) ++ fprintf (vect_dump, "vect_model_store_cost: unsupported access."); ++ ++ break; ++ } ++ + default: + gcc_unreachable (); } +@@ -803,8 +1026,8 @@ + access scheme chosen. */ -- if (dr_chain) -- VEC_free (tree, heap, dr_chain); + void +-vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) - - return true; - } - -@@ -4387,7 +4816,7 @@ - condition operands are supportable using vec_is_simple_use. */ - - static bool --vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) -+vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) ++vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p, ++ slp_tree slp_node) { - tree lhs, rhs; - tree def; -@@ -4402,7 +4831,7 @@ - if (TREE_CODE (lhs) == SSA_NAME) - { - gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); -- if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def, -+ if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def, - &dt)) - return false; + int group_size; + gimple first_stmt; +@@ -829,9 +1052,11 @@ + first_dr = dr; } -@@ -4413,7 +4842,7 @@ - if (TREE_CODE (rhs) == SSA_NAME) + +- /* Is this an access in a group of loads providing strided access? +- If so, add in the cost of the permutes. */ +- if (group_size > 1) ++ /* We assume that the cost of a single load-lanes instruction is ++ equivalent to the cost of GROUP_SIZE separate loads. If a strided ++ access is instead being provided by a load-and-permute operation, ++ include the cost of the permutes. */ ++ if (!load_lanes_p && group_size > 1) { - gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); -- if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def, -+ if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def, - &dt)) - return false; + /* Uses an even and odd extract operations for each needed permute. */ + inside_cost = ncopies * exact_log2(group_size) * group_size +@@ -938,6 +1163,16 @@ + break; + } + ++ case dr_unaligned_unsupported: ++ { ++ *inside_cost = VECT_MAX_COST; ++ ++ if (vect_print_dump_info (REPORT_COST)) ++ fprintf (vect_dump, "vect_model_load_cost: unsupported access."); ++ ++ break; ++ } ++ + default: + gcc_unreachable (); } -@@ -4439,7 +4868,8 @@ +@@ -1116,7 +1351,14 @@ - bool - vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, -- gimple *vec_stmt, tree reduc_def, int reduc_index) -+ gimple *vec_stmt, tree reduc_def, int reduc_index, -+ slp_tree slp_node) + /* Get the def from the vectorized stmt. */ + def_stmt_info = vinfo_for_stmt (def_stmt); ++ + vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); ++ /* Get vectorized pattern statement. */ ++ if (!vec_stmt ++ && STMT_VINFO_IN_PATTERN_P (def_stmt_info) ++ && !STMT_VINFO_RELEVANT (def_stmt_info)) ++ vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt ( ++ STMT_VINFO_RELATED_STMT (def_stmt_info))); + gcc_assert (vec_stmt); + if (gimple_code (vec_stmt) == GIMPLE_PHI) + vec_oprnd = PHI_RESULT (vec_stmt); +@@ -1265,16 +1507,35 @@ + } + + +-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not +- NULL. */ ++/* Get vectorized definitions for OP0 and OP1. ++ REDUC_INDEX is the index of reduction operand in case of reduction, ++ and -1 otherwise. */ + +-static void ++void + vect_get_vec_defs (tree op0, tree op1, gimple stmt, +- VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1, +- slp_tree slp_node) ++ VEC (tree, heap) **vec_oprnds0, ++ VEC (tree, heap) **vec_oprnds1, ++ slp_tree slp_node, int reduc_index) { - tree scalar_dest = NULL_TREE; - tree vec_dest = NULL_TREE; -@@ -4456,19 +4886,24 @@ - tree def; - enum vect_def_type dt, dts[4]; - int nunits = TYPE_VECTOR_SUBPARTS (vectype); -- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; -+ int ncopies; - enum tree_code code; - stmt_vec_info prev_stmt_info = NULL; -- int j; -+ int i, j; -+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); -+ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; -+ VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL; + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1); ++ { ++ int nops = (op1 == NULL_TREE) ? 1 : 2; ++ VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops); ++ VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops); ++ ++ VEC_quick_push (tree, ops, op0); ++ if (op1) ++ VEC_quick_push (tree, ops, op1); ++ ++ vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index); ++ ++ *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0); ++ if (op1) ++ *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1); ++ ++ VEC_free (tree, heap, ops); ++ VEC_free (slp_void_p, heap, vec_defs); ++ } + else + { + tree vec_oprnd; +@@ -1372,6 +1633,7 @@ + VEC(tree, heap) *vargs = NULL; + enum { NARROW, NONE, WIDEN } modifier; + size_t i, nargs; ++ tree lhs; -- /* FORNOW: unsupported in basic block SLP. */ -- gcc_assert (loop_vinfo); -+ if (slp_node || PURE_SLP_STMT (stmt_info)) -+ ncopies = 1; -+ else -+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + /* FORNOW: unsupported in basic block SLP. */ + gcc_assert (loop_vinfo); +@@ -1509,7 +1771,7 @@ + /** Transform. **/ - gcc_assert (ncopies >= 1); -- if (reduc_index && ncopies > 1) -+ if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info))) - return false; /* FORNOW */ + if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "transform operation."); ++ fprintf (vect_dump, "transform call."); -- if (!STMT_VINFO_RELEVANT_P (stmt_info)) -+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) - return false; + /* Handle def. */ + scalar_dest = gimple_call_lhs (stmt); +@@ -1628,8 +1890,11 @@ + rhs of the statement with something harmless. */ - if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def -@@ -4476,10 +4911,6 @@ - && reduc_def)) - return false; + type = TREE_TYPE (scalar_dest); +- new_stmt = gimple_build_assign (gimple_call_lhs (stmt), +- build_zero_cst (type)); ++ if (is_pattern_stmt_p (stmt_info)) ++ lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)); ++ else ++ lhs = gimple_call_lhs (stmt); ++ new_stmt = gimple_build_assign (lhs, build_zero_cst (type)); + set_vinfo_for_stmt (new_stmt, stmt_info); + /* For pattern statements make the related statement to point to + NEW_STMT in order to be able to retrieve the original statement +@@ -1858,7 +2123,8 @@ + for (j = 0; j < ncopies; j++) + { + if (j == 0) +- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); ++ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node, ++ -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); -- /* FORNOW: SLP not supported. */ -- if (STMT_SLP_TYPE (stmt_info)) -- return false; -- - /* FORNOW: not yet supported. */ - if (STMT_VINFO_LIVE_P (stmt_info)) +@@ -2063,7 +2329,7 @@ { -@@ -4503,7 +4934,7 @@ - then_clause = TREE_OPERAND (op, 1); - else_clause = TREE_OPERAND (op, 2); + /* Handle uses. */ + if (j == 0) +- vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); ++ vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); -- if (!vect_is_simple_cond (cond_expr, loop_vinfo)) -+ if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo)) - return false; +@@ -2096,6 +2362,42 @@ + } - /* We do not handle two different vector types for the condition -@@ -4515,7 +4946,7 @@ - if (TREE_CODE (then_clause) == SSA_NAME) - { - gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); -- if (!vect_is_simple_use (then_clause, loop_vinfo, NULL, -+ if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo, - &then_def_stmt, &def, &dt)) - return false; - } -@@ -4527,7 +4958,7 @@ - if (TREE_CODE (else_clause) == SSA_NAME) - { - gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); -- if (!vect_is_simple_use (else_clause, loop_vinfo, NULL, -+ if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo, - &else_def_stmt, &def, &dt)) - return false; - } -@@ -4545,7 +4976,15 @@ - return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode); - } -- /* Transform */ -+ /* Transform. */ ++/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE ++ either as shift by a scalar or by a vector. */ + -+ if (!slp_node) -+ { -+ vec_oprnds0 = VEC_alloc (tree, heap, 1); -+ vec_oprnds1 = VEC_alloc (tree, heap, 1); -+ vec_oprnds2 = VEC_alloc (tree, heap, 1); -+ vec_oprnds3 = VEC_alloc (tree, heap, 1); -+ } - - /* Handle def. */ - scalar_dest = gimple_assign_lhs (stmt); -@@ -4554,67 +4993,118 @@ - /* Handle cond expr. */ - for (j = 0; j < ncopies; j++) - { -- gimple new_stmt; -+ gimple new_stmt = NULL; - if (j == 0) - { -- gimple gtemp; -- vec_cond_lhs = -+ if (slp_node) -+ { -+ VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4); -+ VEC (slp_void_p, heap) *vec_defs; ++bool ++vect_supportable_shift (enum tree_code code, tree scalar_type) ++{ + -+ vec_defs = VEC_alloc (slp_void_p, heap, 4); -+ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0)); -+ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1)); -+ VEC_safe_push (tree, heap, ops, then_clause); -+ VEC_safe_push (tree, heap, ops, else_clause); -+ vect_get_slp_defs (ops, slp_node, &vec_defs, -1); -+ vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); -+ vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); -+ vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); -+ vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ enum machine_mode vec_mode; ++ optab optab; ++ int icode; ++ tree vectype; + -+ VEC_free (tree, heap, ops); -+ VEC_free (slp_void_p, heap, vec_defs); -+ } -+ else -+ { -+ gimple gtemp; -+ vec_cond_lhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), - stmt, NULL); -- vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, -+ vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, - NULL, >emp, &def, &dts[0]); -- vec_cond_rhs = -- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), -- stmt, NULL); -- vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, -- NULL, >emp, &def, &dts[1]); -- if (reduc_index == 1) -- vec_then_clause = reduc_def; -- else -- { -- vec_then_clause = vect_get_vec_def_for_operand (then_clause, -- stmt, NULL); -- vect_is_simple_use (then_clause, loop_vinfo, -- NULL, >emp, &def, &dts[2]); -- } -- if (reduc_index == 2) -- vec_else_clause = reduc_def; -- else -- { -- vec_else_clause = vect_get_vec_def_for_operand (else_clause, ++ vectype = get_vectype_for_scalar_type (scalar_type); ++ if (!vectype) ++ return false; + -+ vec_cond_rhs = -+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), -+ stmt, NULL); -+ vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, -+ NULL, >emp, &def, &dts[1]); -+ if (reduc_index == 1) -+ vec_then_clause = reduc_def; -+ else -+ { -+ vec_then_clause = vect_get_vec_def_for_operand (then_clause, -+ stmt, NULL); -+ vect_is_simple_use (then_clause, loop_vinfo, -+ NULL, >emp, &def, &dts[2]); -+ } -+ if (reduc_index == 2) -+ vec_else_clause = reduc_def; -+ else -+ { -+ vec_else_clause = vect_get_vec_def_for_operand (else_clause, - stmt, NULL); -- vect_is_simple_use (else_clause, loop_vinfo, -+ vect_is_simple_use (else_clause, loop_vinfo, - NULL, >emp, &def, &dts[3]); -+ } - } - } - else - { -- vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs); -- vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs); -+ vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], -+ VEC_pop (tree, vec_oprnds0)); -+ vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], -+ VEC_pop (tree, vec_oprnds1)); - vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], -- vec_then_clause); -+ VEC_pop (tree, vec_oprnds2)); - vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], -- vec_else_clause); -+ VEC_pop (tree, vec_oprnds3)); -+ } ++ optab = optab_for_tree_code (code, vectype, optab_scalar); ++ if (!optab ++ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) ++ { ++ optab = optab_for_tree_code (code, vectype, optab_vector); ++ if (!optab ++ || (optab_handler (optab, TYPE_MODE (vectype)) ++ == CODE_FOR_nothing)) ++ return false; ++ } + -+ if (!slp_node) -+ { -+ VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs); -+ VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs); -+ VEC_quick_push (tree, vec_oprnds2, vec_then_clause); -+ VEC_quick_push (tree, vec_oprnds3, vec_else_clause); - } - - /* Arguments are ready. Create the new vector stmt. */ -- vec_compare = build2 (TREE_CODE (cond_expr), vectype, -- vec_cond_lhs, vec_cond_rhs); -- vec_cond_expr = build3 (VEC_COND_EXPR, vectype, -- vec_compare, vec_then_clause, vec_else_clause); -+ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs) -+ { -+ vec_cond_rhs = VEC_index (tree, vec_oprnds1, i); -+ vec_then_clause = VEC_index (tree, vec_oprnds2, i); -+ vec_else_clause = VEC_index (tree, vec_oprnds3, i); ++ vec_mode = TYPE_MODE (vectype); ++ icode = (int) optab_handler (optab, vec_mode); ++ if (icode == CODE_FOR_nothing) ++ return false; + -+ vec_compare = build2 (TREE_CODE (cond_expr), vectype, -+ vec_cond_lhs, vec_cond_rhs); -+ vec_cond_expr = build3 (VEC_COND_EXPR, vectype, -+ vec_compare, vec_then_clause, vec_else_clause); - -- new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_assign_set_lhs (new_stmt, new_temp); -- vect_finish_stmt_generation (stmt, new_stmt, gsi); -- if (j == 0) -- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; -- else -- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; -+ new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); -+ new_temp = make_ssa_name (vec_dest, new_stmt); -+ gimple_assign_set_lhs (new_stmt, new_temp); -+ vect_finish_stmt_generation (stmt, new_stmt, gsi); -+ if (slp_node) -+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); -+ } - -- prev_stmt_info = vinfo_for_stmt (new_stmt); -+ if (slp_node) -+ continue; ++ return true; ++} + -+ if (j == 0) -+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; -+ else -+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + -+ prev_stmt_info = vinfo_for_stmt (new_stmt); - } + /* Function vectorizable_shift. -+ VEC_free (tree, heap, vec_oprnds0); -+ VEC_free (tree, heap, vec_oprnds1); -+ VEC_free (tree, heap, vec_oprnds2); -+ VEC_free (tree, heap, vec_oprnds3); -+ - return true; - } + Check if STMT performs a shift operation that can be vectorized. +@@ -2382,10 +2684,10 @@ + operand 1 should be of a vector type (the usual case). */ + if (vec_oprnd1) + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, +- slp_node); ++ slp_node, -1); + else + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, +- slp_node); ++ slp_node, -1); + } + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); +@@ -2693,10 +2995,10 @@ + { + if (op_type == binary_op || op_type == ternary_op) + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, +- slp_node); ++ slp_node, -1); + else + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, +- slp_node); ++ slp_node, -1); + if (op_type == ternary_op) + { + vec_oprnds2 = VEC_alloc (tree, heap, 1); +@@ -2887,11 +3189,9 @@ + VEC (tree, heap) *vec_oprnds0 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; + tree last_oprnd, intermediate_type; ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); -@@ -4629,6 +5119,7 @@ - enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); - bool ok; - tree scalar_type, vectype; -+ gimple pattern_stmt, pattern_def_stmt; +- /* FORNOW: not supported by basic block SLP vectorization. */ +- gcc_assert (loop_vinfo); +- +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; - if (vect_print_dump_info (REPORT_DETAILS)) + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) +@@ -2919,7 +3219,7 @@ + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && CONVERT_EXPR_CODE_P (code)))) + return false; +- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, ++ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[0], &vectype_in)) { -@@ -4650,16 +5141,70 @@ - - any LABEL_EXPRs in the loop - - computations that are used only for array indexing or loop control. - In basic blocks we only analyze statements that are a part of some SLP -- instance, therefore, all the statements are relevant. */ -+ instance, therefore, all the statements are relevant. - -+ Pattern statement needs to be analyzed instead of the original statement -+ if the original statement is not relevant. Otherwise, we analyze both -+ statements. */ -+ -+ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - if (!STMT_VINFO_RELEVANT_P (stmt_info) - && !STMT_VINFO_LIVE_P (stmt_info)) + if (vect_print_dump_info (REPORT_DETAILS)) +@@ -3010,7 +3310,8 @@ { -- if (vect_print_dump_info (REPORT_DETAILS)) -- fprintf (vect_dump, "irrelevant."); -+ if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && pattern_stmt -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) -+ { -+ /* Analyze PATTERN_STMT instead of the original stmt. */ -+ stmt = pattern_stmt; -+ stmt_info = vinfo_for_stmt (pattern_stmt); -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "==> examining pattern statement: "); -+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -+ } -+ } -+ else -+ { -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ fprintf (vect_dump, "irrelevant."); + /* Handle uses. */ + if (slp_node) +- vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1); ++ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, ++ slp_node, -1); + else + { + VEC_free (tree, heap, vec_oprnds0); +@@ -3166,11 +3467,10 @@ + int multi_step_cvt = 0; + VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; + VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL; ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ unsigned int k; + +- /* FORNOW: not supported by basic block SLP vectorization. */ +- gcc_assert (loop_vinfo); +- +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; -- return true; -+ return true; -+ } + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def) +@@ -3185,7 +3485,8 @@ + + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) +- && code != WIDEN_MULT_EXPR) ++ && code != WIDEN_MULT_EXPR ++ && code != WIDEN_LSHIFT_EXPR) + return false; + + scalar_dest = gimple_assign_lhs (stmt); +@@ -3199,13 +3500,40 @@ + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0)) + && CONVERT_EXPR_CODE_P (code)))) + return false; +- if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL, ++ if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt[0], &vectype_in)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "use not simple."); + return false; } -+ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && pattern_stmt -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) -+ { -+ /* Analyze PATTERN_STMT too. */ -+ if (vect_print_dump_info (REPORT_DETAILS)) -+ { -+ fprintf (vect_dump, "==> examining pattern statement: "); -+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -+ } -+ -+ if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) -+ return false; -+ } + -+ if (is_pattern_stmt_p (stmt_info) -+ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) -+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) -+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ op_type = TREE_CODE_LENGTH (code); ++ if (op_type == binary_op) + { -+ /* Analyze def stmt of STMT if it's a pattern stmt. */ -+ if (vect_print_dump_info (REPORT_DETAILS)) ++ bool ok; ++ ++ op1 = gimple_assign_rhs2 (stmt); ++ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) + { -+ fprintf (vect_dump, "==> examining pattern def statement: "); -+ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM); -+ } ++ /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of ++ OP1. */ ++ if (CONSTANT_CLASS_P (op0)) ++ ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, ++ &def_stmt, &def, &dt[1], &vectype_in); ++ else ++ ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, ++ &def, &dt[1]); + -+ if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node)) -+ return false; -+ } ++ if (!ok) ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "use not simple."); ++ return false; ++ } ++ } ++ } + + /* If op0 is an external or constant def use a vector type with + the same size as the output vector type. */ + if (!vectype_in) +@@ -3238,18 +3566,6 @@ - switch (STMT_VINFO_DEF_TYPE (stmt_info)) - { -@@ -4733,15 +5278,18 @@ - || vectorizable_call (stmt, NULL, NULL) - || vectorizable_store (stmt, NULL, NULL, NULL) - || vectorizable_reduction (stmt, NULL, NULL, NULL) -- || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); -+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); - else - { - if (bb_vinfo) -- ok = (vectorizable_shift (stmt, NULL, NULL, node) -+ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) -+ || vectorizable_type_demotion (stmt, NULL, NULL, node) -+ || vectorizable_shift (stmt, NULL, NULL, node) - || vectorizable_operation (stmt, NULL, NULL, node) - || vectorizable_assignment (stmt, NULL, NULL, node) - || vectorizable_load (stmt, NULL, NULL, node, NULL) -- || vectorizable_store (stmt, NULL, NULL, node)); -+ || vectorizable_store (stmt, NULL, NULL, node) -+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); - } - - if (!ok) -@@ -4777,27 +5325,6 @@ - return false; - } + gcc_assert (ncopies >= 1); -- if (!PURE_SLP_STMT (stmt_info)) +- op_type = TREE_CODE_LENGTH (code); +- if (op_type == binary_op) - { -- /* Groups of strided accesses whose size is not a power of 2 are not -- vectorizable yet using loop-vectorization. Therefore, if this stmt -- feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and -- loop-based vectorized), the loop cannot be vectorized. */ -- if (STMT_VINFO_STRIDED_ACCESS (stmt_info) -- && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( -- DR_GROUP_FIRST_DR (stmt_info)))) == -1) +- op1 = gimple_assign_rhs2 (stmt); +- if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) - { -- if (vect_print_dump_info (REPORT_DETAILS)) -- { -- fprintf (vect_dump, "not vectorized: the size of group " -- "of strided accesses is not a power of 2"); -- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); -- } -- +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "use not simple."); - return false; - } - } - - return true; - } + /* Supportable by target? */ + if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, + &decl1, &decl2, &code1, &code2, +@@ -3275,6 +3591,14 @@ + fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", + ncopies); -@@ -4814,7 +5341,6 @@ - bool is_store = false; - gimple vec_stmt = NULL; ++ if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR) ++ { ++ if (CONSTANT_CLASS_P (op0)) ++ op0 = fold_convert (TREE_TYPE (op1), op0); ++ else if (CONSTANT_CLASS_P (op1)) ++ op1 = fold_convert (TREE_TYPE (op0), op1); ++ } ++ + /* Handle def. */ + /* In case of multi-step promotion, we first generate promotion operations + to the intermediate types, and then from that types to the final one. +@@ -3308,6 +3632,8 @@ + if (op_type == binary_op) + vec_oprnds1 = VEC_alloc (tree, heap, 1); + } ++ else if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size); + + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -3321,15 +3647,33 @@ + if (j == 0) + { + if (slp_node) +- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, +- &vec_oprnds1, -1); +- else ++ { ++ if (code == WIDEN_LSHIFT_EXPR) ++ { ++ vec_oprnd1 = op1; ++ /* Store vec_oprnd1 for every vector stmt to be created ++ for SLP_NODE. We check during the analysis that all ++ the shift arguments are the same. */ ++ for (k = 0; k < slp_node->vec_stmts_size - 1; k++) ++ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); ++ ++ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, ++ slp_node, -1); ++ } ++ else ++ vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, ++ &vec_oprnds1, slp_node, -1); ++ } ++ else + { + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + VEC_quick_push (tree, vec_oprnds0, vec_oprnd0); + if (op_type == binary_op) + { +- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); ++ if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnd1 = op1; ++ else ++ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + } + } +@@ -3340,7 +3684,10 @@ + VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0); + if (op_type == binary_op) + { +- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); ++ if (code == WIDEN_LSHIFT_EXPR) ++ vec_oprnd1 = op1; ++ else ++ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); + VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1); + } + } +@@ -3385,6 +3732,7 @@ stmt_vec_info stmt_info = vinfo_for_stmt (stmt); -- gimple orig_stmt_in_pattern; - bool done; + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); ++ tree elem_type; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = NULL; + enum machine_mode vec_mode; +@@ -3400,6 +3748,7 @@ + int j; + gimple next_stmt, first_stmt = NULL; + bool strided_store = false; ++ bool store_lanes_p = false; + unsigned int group_size, i; + VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; + bool inv_p; +@@ -3407,6 +3756,7 @@ + bool slp = (slp_node != NULL); + unsigned int vec_num; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ tree aggr_type; - switch (STMT_VINFO_TYPE (stmt_info)) -@@ -4879,8 +5405,7 @@ - break; + if (loop_vinfo) + loop = LOOP_VINFO_LOOP (loop_vinfo); +@@ -3460,7 +3810,8 @@ - case condition_vec_info_type: -- gcc_assert (!slp_node); -- done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); -+ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); - gcc_assert (done); - break; + /* The scalar rhs type needs to be trivially convertible to the vector + component type. This should always be the case. */ +- if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op))) ++ elem_type = TREE_TYPE (vectype); ++ if (!useless_type_conversion_p (elem_type, TREE_TYPE (op))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "??? operands of different types"); +@@ -3487,9 +3838,14 @@ + { + strided_store = true; + first_stmt = DR_GROUP_FIRST_DR (stmt_info); +- if (!vect_strided_store_supported (vectype) +- && !PURE_SLP_STMT (stmt_info) && !slp) +- return false; ++ if (!slp && !PURE_SLP_STMT (stmt_info)) ++ { ++ group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); ++ if (vect_store_lanes_supported (vectype, group_size)) ++ store_lanes_p = true; ++ else if (!vect_strided_store_supported (vectype, group_size)) ++ return false; ++ } -@@ -4953,21 +5478,7 @@ + if (first_stmt == stmt) + { +@@ -3515,7 +3871,7 @@ + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; +- vect_model_store_cost (stmt_info, ncopies, dt, NULL); ++ vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL); + return true; } - if (vec_stmt) -- { -- STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; -- orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); -- if (orig_stmt_in_pattern) -- { -- stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); -- /* STMT was inserted by the vectorizer to replace a computation idiom. -- ORIG_STMT_IN_PATTERN is a stmt in the original sequence that -- computed this idiom. We need to record a pointer to VEC_STMT in -- the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the -- documentation of vect_pattern_recog. */ -- if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) -- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; +@@ -3549,6 +3905,7 @@ + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); + first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); ++ op = gimple_assign_rhs1 (first_stmt); + } + else + /* VEC_NUM is the number of vect stmts to be created for this +@@ -3570,6 +3927,16 @@ + + alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); + gcc_assert (alignment_support_scheme); ++ /* Targets with store-lane instructions must not require explicit ++ realignment. */ ++ gcc_assert (!store_lanes_p ++ || alignment_support_scheme == dr_aligned ++ || alignment_support_scheme == dr_unaligned_supported); ++ ++ if (store_lanes_p) ++ aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); ++ else ++ aggr_type = vectype; + + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -3621,8 +3988,8 @@ + if (slp) + { + /* Get vectorized arguments for SLP_NODE. */ +- vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds, +- NULL, -1); ++ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, ++ NULL, slp_node, -1); + + vec_oprnd = VEC_index (tree, vec_oprnds, 0); + } +@@ -3658,9 +4025,9 @@ + /* We should have catched mismatched types earlier. */ + gcc_assert (useless_type_conversion_p (vectype, + TREE_TYPE (vec_oprnd))); +- dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE, +- &dummy, &ptr_incr, false, +- &inv_p); ++ dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL, ++ NULL_TREE, &dummy, ++ &ptr_incr, false, &inv_p); + gcc_assert (bb_vinfo || !inv_p); + } + else +@@ -3681,76 +4048,101 @@ + VEC_replace(tree, dr_chain, i, vec_oprnd); + VEC_replace(tree, oprnds, i, vec_oprnd); + } +- dataref_ptr = +- bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, ++ TYPE_SIZE_UNIT (aggr_type)); + } + +- if (strided_store) ++ if (store_lanes_p) + { +- result_chain = VEC_alloc (tree, heap, group_size); +- /* Permute. */ +- if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, +- &result_chain)) +- return false; - } -- } -+ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; ++ tree vec_array; - return is_store; - } -@@ -5017,6 +5528,7 @@ - STMT_VINFO_VECTORIZABLE (res) = true; - STMT_VINFO_IN_PATTERN_P (res) = false; - STMT_VINFO_RELATED_STMT (res) = NULL; -+ STMT_VINFO_PATTERN_DEF_STMT (res) = NULL; - STMT_VINFO_DATA_REF (res) = NULL; +- next_stmt = first_stmt; +- for (i = 0; i < vec_num; i++) +- { +- struct ptr_info_def *pi; +- +- if (i > 0) +- /* Bump the vector pointer. */ +- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, +- NULL_TREE); +- +- if (slp) +- vec_oprnd = VEC_index (tree, vec_oprnds, i); +- else if (strided_store) +- /* For strided stores vectorized defs are interleaved in +- vect_permute_store_chain(). */ +- vec_oprnd = VEC_index (tree, result_chain, i); +- +- data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- pi = get_ptr_info (dataref_ptr); +- pi->align = TYPE_ALIGN_UNIT (vectype); +- if (aligned_access_p (first_dr)) +- pi->misalign = 0; +- else if (DR_MISALIGNMENT (first_dr) == -1) +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); +- pi->misalign = 0; +- } +- else ++ /* Combine all the vectors into an array. */ ++ vec_array = create_vector_array (vectype, vec_num); ++ for (i = 0; i < vec_num; i++) + { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->misalign = DR_MISALIGNMENT (first_dr); ++ vec_oprnd = VEC_index (tree, dr_chain, i); ++ write_vector_array (stmt, gsi, vec_oprnd, vec_array, i); + } + +- /* Arguments are ready. Create the new vector stmt. */ +- new_stmt = gimple_build_assign (data_ref, vec_oprnd); ++ /* Emit: ++ MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */ ++ data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); ++ new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array); ++ gimple_call_set_lhs (new_stmt, data_ref); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); ++ } ++ else ++ { ++ new_stmt = NULL; ++ if (strided_store) ++ { ++ result_chain = VEC_alloc (tree, heap, group_size); ++ /* Permute. */ ++ vect_permute_store_chain (dr_chain, group_size, stmt, gsi, ++ &result_chain); ++ } + +- if (slp) +- continue; ++ next_stmt = first_stmt; ++ for (i = 0; i < vec_num; i++) ++ { ++ struct ptr_info_def *pi; + +- if (j == 0) +- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; ++ if (i > 0) ++ /* Bump the vector pointer. */ ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, ++ stmt, NULL_TREE); ++ ++ if (slp) ++ vec_oprnd = VEC_index (tree, vec_oprnds, i); ++ else if (strided_store) ++ /* For strided stores vectorized defs are interleaved in ++ vect_permute_store_chain(). */ ++ vec_oprnd = VEC_index (tree, result_chain, i); ++ ++ data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ pi = get_ptr_info (dataref_ptr); ++ pi->align = TYPE_ALIGN_UNIT (vectype); ++ if (aligned_access_p (first_dr)) ++ pi->misalign = 0; ++ else if (DR_MISALIGNMENT (first_dr) == -1) ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->align = TYPE_ALIGN_UNIT (elem_type); ++ pi->misalign = 0; ++ } ++ else ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->misalign = DR_MISALIGNMENT (first_dr); ++ } ++ ++ /* Arguments are ready. Create the new vector stmt. */ ++ new_stmt = gimple_build_assign (data_ref, vec_oprnd); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); ++ ++ if (slp) ++ continue; ++ ++ next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); ++ if (!next_stmt) ++ break; ++ } ++ } ++ if (!slp) ++ { ++ if (j == 0) ++ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; +- + prev_stmt_info = vinfo_for_stmt (new_stmt); +- next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); +- if (!next_stmt) +- break; + } + } + +@@ -3861,6 +4253,7 @@ + bool nested_in_vect_loop = false; + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; + tree vectype = STMT_VINFO_VECTYPE (stmt_info); ++ tree elem_type; + tree new_temp; + enum machine_mode mode; + gimple new_stmt = NULL; +@@ -3877,6 +4270,7 @@ + gimple phi = NULL; + VEC(tree,heap) *dr_chain = NULL; + bool strided_load = false; ++ bool load_lanes_p = false; + gimple first_stmt; + tree scalar_type; + bool inv_p; +@@ -3889,6 +4283,7 @@ + enum tree_code code; + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + int vf; ++ tree aggr_type; + + if (loop_vinfo) + { +@@ -3965,7 +4360,8 @@ + + /* The vector component type needs to be trivially convertible to the + scalar lhs. This should always be the case. */ +- if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype))) ++ elem_type = TREE_TYPE (vectype); ++ if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "??? operands of different types"); +@@ -3979,10 +4375,15 @@ + /* FORNOW */ + gcc_assert (! nested_in_vect_loop); - STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; -@@ -5345,8 +5857,12 @@ - || *dt == vect_nested_cycle) +- /* Check if interleaving is supported. */ +- if (!vect_strided_load_supported (vectype) +- && !PURE_SLP_STMT (stmt_info) && !slp) +- return false; ++ first_stmt = DR_GROUP_FIRST_DR (stmt_info); ++ if (!slp && !PURE_SLP_STMT (stmt_info)) ++ { ++ group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); ++ if (vect_load_lanes_supported (vectype, group_size)) ++ load_lanes_p = true; ++ else if (!vect_strided_load_supported (vectype, group_size)) ++ return false; ++ } + } + + if (negative) +@@ -4007,18 +4408,23 @@ + if (!vec_stmt) /* transformation not required. */ { - stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); -- if (STMT_VINFO_IN_PATTERN_P (stmt_info)) -+ -+ if (STMT_VINFO_IN_PATTERN_P (stmt_info) -+ && !STMT_VINFO_RELEVANT (stmt_info) -+ && !STMT_VINFO_LIVE_P (stmt_info)) - stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); -+ - *vectype = STMT_VINFO_VECTYPE (stmt_info); - gcc_assert (*vectype != NULL_TREE); + STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; +- vect_model_load_cost (stmt_info, ncopies, NULL); ++ vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL); + return true; } -@@ -5395,7 +5911,7 @@ - { - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); -- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); -+ struct loop *vect_loop = NULL; - bool ordered_p; - enum machine_mode vec_mode; - enum insn_code icode1, icode2; -@@ -5404,6 +5920,9 @@ - tree wide_vectype = vectype_out; - enum tree_code c1, c2; -+ if (loop_info) -+ vect_loop = LOOP_VINFO_LOOP (loop_info); -+ - /* The result of a vectorized widening operation usually requires two vectors - (because the widened results do not fit int one vector). The generated - vector results would normally be expected to be generated in the same -@@ -5424,7 +5943,8 @@ - iterations in parallel). We therefore don't allow to change the order - of the computation in the inner-loop during outer-loop vectorization. */ + if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "transform load."); ++ fprintf (vect_dump, "transform load. ncopies = %d", ncopies); -- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction -+ if (vect_loop -+ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction - && !nested_in_vect_loop_p (vect_loop, stmt)) - ordered_p = false; - else -@@ -5461,6 +5981,19 @@ - } - break; + /** Transform. **/ -+ case WIDEN_LSHIFT_EXPR: -+ if (BYTES_BIG_ENDIAN) -+ { -+ c1 = VEC_WIDEN_LSHIFT_HI_EXPR; -+ c2 = VEC_WIDEN_LSHIFT_LO_EXPR; -+ } -+ else -+ { -+ c2 = VEC_WIDEN_LSHIFT_HI_EXPR; -+ c1 = VEC_WIDEN_LSHIFT_LO_EXPR; -+ } -+ break; + if (strided_load) + { + first_stmt = DR_GROUP_FIRST_DR (stmt_info); ++ if (slp ++ && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance) ++ && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0)) ++ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); + - CASE_CONVERT: - if (BYTES_BIG_ENDIAN) - { ---- a/src/gcc/tree-vectorizer.h -+++ b/src/gcc/tree-vectorizer.h -@@ -73,15 +73,15 @@ - /************************************************************************ - SLP - ************************************************************************/ -+typedef void *slp_void_p; -+DEF_VEC_P (slp_void_p); -+DEF_VEC_ALLOC_P (slp_void_p, heap); + /* Check if the chain of loads is already vectorized. */ + if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))) + { +@@ -4038,8 +4444,6 @@ + } + else + vec_num = group_size; +- +- dr_chain = VEC_alloc (tree, heap, vec_num); + } + else + { +@@ -4050,6 +4454,11 @@ --/* A computation tree of an SLP instance. Each node corresponds to a group of -+/* A computation tree of an SLP instance. Each node corresponds to a group of - stmts to be packed in a SIMD stmt. */ - typedef struct _slp_tree { -- /* Only binary and unary operations are supported. LEFT child corresponds to -- the first operand and RIGHT child to the second if the operation is -- binary. */ -- struct _slp_tree *left; -- struct _slp_tree *right; -+ /* Nodes that contain def-stmts of this node statements operands. */ -+ VEC (slp_void_p, heap) *children; - /* A group of scalar stmts to be vectorized together. */ - VEC (gimple, heap) *stmts; - /* Vectorized stmt/s. */ -@@ -146,14 +146,32 @@ - #define SLP_INSTANCE_LOADS(S) (S)->loads - #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load + alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); + gcc_assert (alignment_support_scheme); ++ /* Targets with load-lane instructions must not require explicit ++ realignment. */ ++ gcc_assert (!load_lanes_p ++ || alignment_support_scheme == dr_aligned ++ || alignment_support_scheme == dr_unaligned_supported); --#define SLP_TREE_LEFT(S) (S)->left --#define SLP_TREE_RIGHT(S) (S)->right -+#define SLP_TREE_CHILDREN(S) (S)->children - #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts - #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts - #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size - #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop - #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to generate +@@ -4181,208 +4590,252 @@ + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); -+/* This structure is used in creation of an SLP tree. Each instance -+ corresponds to the same operand in a group of scalar stmts in an SLP -+ node. */ -+typedef struct _slp_oprnd_info -+{ -+ /* Def-stmts for the operands. */ -+ VEC (gimple, heap) *def_stmts; -+ /* Information about the first statement, its vector def-type, type, the -+ operand itself in case it's constant, and an indication if it's a pattern -+ stmt. */ -+ enum vect_def_type first_dt; -+ tree first_def_type; -+ tree first_const_oprnd; -+ bool first_pattern; -+} *slp_oprnd_info; ++ if (load_lanes_p) ++ aggr_type = build_array_type_nelts (elem_type, vec_num * nunits); ++ else ++ aggr_type = vectype; + -+DEF_VEC_P(slp_oprnd_info); -+DEF_VEC_ALLOC_P(slp_oprnd_info, heap); + prev_stmt_info = NULL; + for (j = 0; j < ncopies; j++) + { + /* 1. Create the vector pointer update chain. */ + if (j == 0) +- dataref_ptr = vect_create_data_ref_ptr (first_stmt, ++ dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, + at_loop, offset, + &dummy, &ptr_incr, false, + &inv_p); + else +- dataref_ptr = +- bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE); ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, ++ TYPE_SIZE_UNIT (aggr_type)); + ++ if (strided_load || slp_perm) ++ dr_chain = VEC_alloc (tree, heap, vec_num); - typedef struct _vect_peel_info - { -@@ -464,6 +482,9 @@ - pattern). */ - gimple related_stmt; +- for (i = 0; i < vec_num; i++) ++ if (load_lanes_p) + { +- if (i > 0) +- dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, +- NULL_TREE); ++ tree vec_array; -+ /* Used to keep a def stmt of a pattern stmt if such exists. */ -+ gimple pattern_def_stmt; -+ - /* List of datarefs that are known to have the same alignment as the dataref - of this stmt. */ - VEC(dr_p,heap) *same_align_refs; -@@ -531,6 +552,7 @@ +- /* 2. Create the vector-load in the loop. */ +- switch (alignment_support_scheme) +- { +- case dr_aligned: +- case dr_unaligned_supported: +- { +- struct ptr_info_def *pi; +- data_ref +- = build2 (MEM_REF, vectype, dataref_ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- pi = get_ptr_info (dataref_ptr); +- pi->align = TYPE_ALIGN_UNIT (vectype); +- if (alignment_support_scheme == dr_aligned) +- { +- gcc_assert (aligned_access_p (first_dr)); +- pi->misalign = 0; +- } +- else if (DR_MISALIGNMENT (first_dr) == -1) +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); +- pi->misalign = 0; +- } +- else +- { +- TREE_TYPE (data_ref) +- = build_aligned_type (TREE_TYPE (data_ref), +- TYPE_ALIGN (TREE_TYPE (vectype))); +- pi->misalign = DR_MISALIGNMENT (first_dr); +- } +- break; +- } +- case dr_explicit_realign: +- { +- tree ptr, bump; +- tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); ++ vec_array = create_vector_array (vectype, vec_num); - #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p - #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt -+#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt - #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs - #define STMT_VINFO_DEF_TYPE(S) (S)->def_type - #define STMT_VINFO_DR_GROUP_FIRST_DR(S) (S)->first_dr -@@ -794,9 +816,9 @@ - extern tree vectorizable_function (gimple, tree, tree); - extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, - slp_tree); --extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type, -- slp_tree); --extern void vect_model_load_cost (stmt_vec_info, int, slp_tree); -+extern void vect_model_store_cost (stmt_vec_info, int, bool, -+ enum vect_def_type, slp_tree); -+extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree); - extern void vect_finish_stmt_generation (gimple, gimple, - gimple_stmt_iterator *); - extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); -@@ -810,10 +832,13 @@ - extern void vect_remove_stores (gimple); - extern bool vect_analyze_stmt (gimple, bool *, slp_tree); - extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, -- tree, int); -+ tree, int, slp_tree); - extern void vect_get_load_cost (struct data_reference *, int, bool, - unsigned int *, unsigned int *); - extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); -+extern bool vect_supportable_shift (enum tree_code, tree); -+extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **, -+ VEC (tree, heap) **, slp_tree, int); +- if (compute_in_loop) +- msq = vect_setup_realignment (first_stmt, gsi, +- &realignment_token, +- dr_explicit_realign, +- dataref_ptr, NULL); +- +- new_stmt = gimple_build_assign_with_ops +- (BIT_AND_EXPR, NULL_TREE, dataref_ptr, +- build_int_cst +- (TREE_TYPE (dataref_ptr), +- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); +- ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); +- gimple_assign_set_lhs (new_stmt, ptr); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- data_ref +- = build2 (MEM_REF, vectype, ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- vec_dest = vect_create_destination_var (scalar_dest, vectype); +- new_stmt = gimple_build_assign (vec_dest, data_ref); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- msq = new_temp; +- +- bump = size_binop (MULT_EXPR, vs_minus_1, +- TYPE_SIZE_UNIT (scalar_type)); +- ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); +- new_stmt = gimple_build_assign_with_ops +- (BIT_AND_EXPR, NULL_TREE, ptr, +- build_int_cst +- (TREE_TYPE (ptr), +- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); +- ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); +- gimple_assign_set_lhs (new_stmt, ptr); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- data_ref +- = build2 (MEM_REF, vectype, ptr, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- break; +- } +- case dr_explicit_realign_optimized: +- new_stmt = gimple_build_assign_with_ops +- (BIT_AND_EXPR, NULL_TREE, dataref_ptr, +- build_int_cst +- (TREE_TYPE (dataref_ptr), +- -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); +- new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- data_ref +- = build2 (MEM_REF, vectype, new_temp, +- build_int_cst (reference_alias_ptr_type +- (DR_REF (first_dr)), 0)); +- break; +- default: +- gcc_unreachable (); +- } +- vec_dest = vect_create_destination_var (scalar_dest, vectype); +- new_stmt = gimple_build_assign (vec_dest, data_ref); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); ++ /* Emit: ++ VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */ ++ data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr); ++ new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref); ++ gimple_call_set_lhs (new_stmt, vec_array); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + mark_symbols_for_renaming (new_stmt); - /* In tree-vect-data-refs.c. */ - extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); -@@ -829,21 +854,22 @@ - extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info); - extern bool vect_prune_runtime_alias_test_list (loop_vec_info); - extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *); --extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *, -- gimple *, bool, bool *); -+extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree, -+ tree *, gimple *, bool, bool *); - extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); - extern tree vect_create_destination_var (tree, tree); --extern bool vect_strided_store_supported (tree); --extern bool vect_strided_load_supported (tree); --extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, -+extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT); -+extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); -+extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT); -+extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); -+extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, - gimple_stmt_iterator *, VEC(tree,heap) **); - extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, - enum dr_alignment_support, tree, - struct loop **); --extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, -- gimple_stmt_iterator *, VEC(tree,heap) **); --extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, -+extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int, - gimple_stmt_iterator *); -+extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *); - extern int vect_get_place_in_interleaving_chain (gimple, gimple); - extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); - extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, -@@ -879,8 +905,9 @@ - extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); - extern void vect_make_slp_decision (loop_vec_info); - extern void vect_detect_hybrid_slp (loop_vec_info); --extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, -- VEC (tree,heap) **, int); -+extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree, -+ VEC (slp_void_p, heap) **, int); +- /* 3. Handle explicit realignment if necessary/supported. Create in +- loop: vec_dest = realign_load (msq, lsq, realignment_token) */ +- if (alignment_support_scheme == dr_explicit_realign_optimized +- || alignment_support_scheme == dr_explicit_realign) ++ /* Extract each vector into an SSA_NAME. */ ++ for (i = 0; i < vec_num; i++) ++ { ++ new_temp = read_vector_array (stmt, gsi, scalar_dest, ++ vec_array, i); ++ VEC_quick_push (tree, dr_chain, new_temp); ++ } + - extern LOC find_bb_location (basic_block); - extern bb_vec_info vect_slp_analyze_bb (basic_block); - extern void vect_slp_transform_bb (basic_block); -@@ -889,9 +916,9 @@ - /* Pattern recognition functions. - Additional pattern recognition functions can (and will) be added - in the future. */ --typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); --#define NUM_PATTERNS 4 --void vect_pattern_recog (loop_vec_info); -+typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); -+#define NUM_PATTERNS 7 -+void vect_pattern_recog (loop_vec_info, bb_vec_info); ++ /* Record the mapping between SSA_NAMEs and statements. */ ++ vect_record_strided_load_vectors (stmt, dr_chain); ++ } ++ else ++ { ++ for (i = 0; i < vec_num; i++) + { +- tree tmp; ++ if (i > 0) ++ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, ++ stmt, NULL_TREE); + +- lsq = gimple_assign_lhs (new_stmt); +- if (!realignment_token) +- realignment_token = dataref_ptr; ++ /* 2. Create the vector-load in the loop. */ ++ switch (alignment_support_scheme) ++ { ++ case dr_aligned: ++ case dr_unaligned_supported: ++ { ++ struct ptr_info_def *pi; ++ data_ref ++ = build2 (MEM_REF, vectype, dataref_ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ pi = get_ptr_info (dataref_ptr); ++ pi->align = TYPE_ALIGN_UNIT (vectype); ++ if (alignment_support_scheme == dr_aligned) ++ { ++ gcc_assert (aligned_access_p (first_dr)); ++ pi->misalign = 0; ++ } ++ else if (DR_MISALIGNMENT (first_dr) == -1) ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->align = TYPE_ALIGN_UNIT (elem_type); ++ pi->misalign = 0; ++ } ++ else ++ { ++ TREE_TYPE (data_ref) ++ = build_aligned_type (TREE_TYPE (data_ref), ++ TYPE_ALIGN (elem_type)); ++ pi->misalign = DR_MISALIGNMENT (first_dr); ++ } ++ break; ++ } ++ case dr_explicit_realign: ++ { ++ tree ptr, bump; ++ tree vs_minus_1 ++ = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); ++ ++ if (compute_in_loop) ++ msq = vect_setup_realignment (first_stmt, gsi, ++ &realignment_token, ++ dr_explicit_realign, ++ dataref_ptr, NULL); ++ ++ new_stmt = gimple_build_assign_with_ops ++ (BIT_AND_EXPR, NULL_TREE, dataref_ptr, ++ build_int_cst ++ (TREE_TYPE (dataref_ptr), ++ -(HOST_WIDE_INT) ++ TYPE_ALIGN_UNIT (vectype))); ++ ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); ++ gimple_assign_set_lhs (new_stmt, ptr); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ data_ref ++ = build2 (MEM_REF, vectype, ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ vec_dest = vect_create_destination_var (scalar_dest, ++ vectype); ++ new_stmt = gimple_build_assign (vec_dest, data_ref); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ msq = new_temp; ++ ++ bump = size_binop (MULT_EXPR, vs_minus_1, ++ TYPE_SIZE_UNIT (scalar_type)); ++ ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); ++ new_stmt = gimple_build_assign_with_ops ++ (BIT_AND_EXPR, NULL_TREE, ptr, ++ build_int_cst ++ (TREE_TYPE (ptr), ++ -(HOST_WIDE_INT) ++ TYPE_ALIGN_UNIT (vectype))); ++ ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt); ++ gimple_assign_set_lhs (new_stmt, ptr); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ data_ref ++ = build2 (MEM_REF, vectype, ptr, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ break; ++ } ++ case dr_explicit_realign_optimized: ++ new_stmt = gimple_build_assign_with_ops ++ (BIT_AND_EXPR, NULL_TREE, dataref_ptr, ++ build_int_cst ++ (TREE_TYPE (dataref_ptr), ++ -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); ++ new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), ++ new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ data_ref ++ = build2 (MEM_REF, vectype, new_temp, ++ build_int_cst (reference_alias_ptr_type ++ (DR_REF (first_dr)), 0)); ++ break; ++ default: ++ gcc_unreachable (); ++ } + vec_dest = vect_create_destination_var (scalar_dest, vectype); +- tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, +- realignment_token); +- new_stmt = gimple_build_assign (vec_dest, tmp); ++ new_stmt = gimple_build_assign (vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ mark_symbols_for_renaming (new_stmt); - /* In tree-vectorizer.c. */ - unsigned vectorize_loops (void); ---- a/src/gcc/tree.c -+++ b/src/gcc/tree.c -@@ -7321,6 +7321,15 @@ - return build_array_type_1 (elt_type, index_type, false); - } +- if (alignment_support_scheme == dr_explicit_realign_optimized) ++ /* 3. Handle explicit realignment if necessary/supported. ++ Create in loop: ++ vec_dest = realign_load (msq, lsq, realignment_token) */ ++ if (alignment_support_scheme == dr_explicit_realign_optimized ++ || alignment_support_scheme == dr_explicit_realign) + { +- gcc_assert (phi); +- if (i == vec_num - 1 && j == ncopies - 1) +- add_phi_arg (phi, lsq, loop_latch_edge (containing_loop), +- UNKNOWN_LOCATION); +- msq = lsq; +- } +- } ++ tree tmp; -+/* Return a representation of ELT_TYPE[NELTS], using indices of type -+ sizetype. */ -+ -+tree -+build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts) -+{ -+ return build_array_type (elt_type, build_index_type (size_int (nelts - 1))); -+} -+ - /* Recursively examines the array elements of TYPE, until a non-array - element type is found. */ +- /* 4. Handle invariant-load. */ +- if (inv_p && !bb_vinfo) +- { +- gcc_assert (!strided_load); +- gcc_assert (nested_in_vect_loop_p (loop, stmt)); +- if (j == 0) +- { +- int k; +- tree t = NULL_TREE; +- tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); +- +- /* CHECKME: bitpos depends on endianess? */ +- bitpos = bitsize_zero_node; +- vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, +- bitsize, bitpos); +- vec_dest = +- vect_create_destination_var (scalar_dest, NULL_TREE); +- new_stmt = gimple_build_assign (vec_dest, vec_inv); +- new_temp = make_ssa_name (vec_dest, new_stmt); ++ lsq = gimple_assign_lhs (new_stmt); ++ if (!realignment_token) ++ realignment_token = dataref_ptr; ++ vec_dest = vect_create_destination_var (scalar_dest, vectype); ++ tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, ++ realignment_token); ++ new_stmt = gimple_build_assign (vec_dest, tmp); ++ new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); ---- a/src/gcc/tree.def -+++ b/src/gcc/tree.def -@@ -1092,6 +1092,19 @@ - is subtracted from t3. */ - DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3) +- for (k = nunits - 1; k >= 0; --k) +- t = tree_cons (NULL_TREE, new_temp, t); +- /* FIXME: use build_constructor directly. */ +- vec_inv = build_constructor_from_list (vectype, t); +- new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); +- new_stmt = SSA_NAME_DEF_STMT (new_temp); ++ if (alignment_support_scheme == dr_explicit_realign_optimized) ++ { ++ gcc_assert (phi); ++ if (i == vec_num - 1 && j == ncopies - 1) ++ add_phi_arg (phi, lsq, ++ loop_latch_edge (containing_loop), ++ UNKNOWN_LOCATION); ++ msq = lsq; ++ } + } +- else +- gcc_unreachable (); /* FORNOW. */ +- } -+/* Widening shift left. -+ The first operand is of type t1. -+ The second operand is the number of bits to shift by; it need not be the -+ same type as the first operand and result. -+ Note that the result is undefined if the second operand is larger -+ than or equal to the first operand's type size. -+ The type of the entire expression is t2, such that t2 is at least twice -+ the size of t1. -+ WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting) -+ the first argument from type t1 to type t2, and then shifting it -+ by the second argument. */ -+DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2) +- if (negative) +- { +- new_temp = reverse_vec_elements (new_temp, stmt, gsi); +- new_stmt = SSA_NAME_DEF_STMT (new_temp); +- } ++ /* 4. Handle invariant-load. */ ++ if (inv_p && !bb_vinfo) ++ { ++ gcc_assert (!strided_load); ++ gcc_assert (nested_in_vect_loop_p (loop, stmt)); ++ if (j == 0) ++ { ++ int k; ++ tree t = NULL_TREE; ++ tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type); + - /* Fused multiply-add. - All operands and the result are of the same type. No intermediate - rounding is performed after multiplying operand one with operand two -@@ -1147,6 +1160,16 @@ - DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2) - DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2) - -+/* Widening vector shift left in bits. -+ Operand 0 is a vector to be shifted with N elements of size S. -+ Operand 1 is an integer shift amount in bits. -+ The result of the operation is N elements of size 2*S. -+ VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results. -+ VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results. -+ */ -+DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2) -+DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2) ++ /* CHECKME: bitpos depends on endianess? */ ++ bitpos = bitsize_zero_node; ++ vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp, ++ bitsize, bitpos); ++ vec_dest = vect_create_destination_var (scalar_dest, ++ NULL_TREE); ++ new_stmt = gimple_build_assign (vec_dest, vec_inv); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); + - /* PREDICT_EXPR. Specify hint for branch prediction. The - PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the - outcome (0 for not taken and 1 for taken). Once the profile is guessed ---- a/src/gcc/tree.h -+++ b/src/gcc/tree.h -@@ -4197,6 +4197,7 @@ - extern tree build_index_type (tree); - extern tree build_array_type (tree, tree); - extern tree build_nonshared_array_type (tree, tree); -+extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT); - extern tree build_function_type (tree, tree); - extern tree build_function_type_list (tree, ...); - extern tree build_function_type_skip_args (tree, bitmap); -@@ -4626,21 +4627,10 @@ ++ for (k = nunits - 1; k >= 0; --k) ++ t = tree_cons (NULL_TREE, new_temp, t); ++ /* FIXME: use build_constructor directly. */ ++ vec_inv = build_constructor_from_list (vectype, t); ++ new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); ++ new_stmt = SSA_NAME_DEF_STMT (new_temp); ++ } ++ else ++ gcc_unreachable (); /* FORNOW. */ ++ } - extern VEC(tree,gc) *ctor_to_vec (tree); +- /* Collect vector loads and later create their permutation in +- vect_transform_strided_load (). */ +- if (strided_load || slp_perm) +- VEC_quick_push (tree, dr_chain, new_temp); ++ if (negative) ++ { ++ new_temp = reverse_vec_elements (new_temp, stmt, gsi); ++ new_stmt = SSA_NAME_DEF_STMT (new_temp); ++ } --/* Examine CTOR to discover: -- * how many scalar fields are set to nonzero values, -- and place it in *P_NZ_ELTS; -- * how many scalar fields in total are in CTOR, -- and place it in *P_ELT_COUNT. -- * if a type is a union, and the initializer from the constructor -- is not the largest element in the union, then set *p_must_clear. -+extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, -+ HOST_WIDE_INT *, bool *); +- /* Store vector loads in the corresponding SLP_NODE. */ +- if (slp && !slp_perm) +- VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); ++ /* Collect vector loads and later create their permutation in ++ vect_transform_strided_load (). */ ++ if (strided_load || slp_perm) ++ VEC_quick_push (tree, dr_chain, new_temp); ++ ++ /* Store vector loads in the corresponding SLP_NODE. */ ++ if (slp && !slp_perm) ++ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), ++ new_stmt); ++ } + } -- Return whether or not CTOR is a valid static constant initializer, the same -- as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ -- --extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *, -- bool *); + if (slp && !slp_perm) +@@ -4401,12 +4854,9 @@ + { + if (strided_load) + { +- if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi)) +- return false; - --extern HOST_WIDE_INT count_type_elements (const_tree, bool); -+extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree); - - /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */ - ---- a/src/gcc/value-prof.c -+++ b/src/gcc/value-prof.c -@@ -1252,6 +1252,9 @@ - if (TREE_CODE (callee) == FUNCTION_DECL) - return false; ++ if (!load_lanes_p) ++ vect_transform_strided_load (stmt, dr_chain, group_size, gsi); + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); +- VEC_free (tree, heap, dr_chain); +- dr_chain = VEC_alloc (tree, heap, group_size); + } + else + { +@@ -4417,11 +4867,10 @@ + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + } ++ if (dr_chain) ++ VEC_free (tree, heap, dr_chain); + } -+ if (gimple_call_internal_p (stmt)) -+ return false; -+ - histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL); - if (!histogram) - return false; -@@ -1640,6 +1643,7 @@ - tree callee; +- if (dr_chain) +- VEC_free (tree, heap, dr_chain); +- + return true; + } - if (gimple_code (stmt) != GIMPLE_CALL -+ || gimple_call_internal_p (stmt) - || gimple_call_fndecl (stmt) != NULL_TREE) - return; +@@ -4435,7 +4884,7 @@ + condition operands are supportable using vec_is_simple_use. */ ---- a/src/libada/ChangeLog -+++ b/src/libada/ChangeLog -@@ -1,3 +1,9 @@ -+2011-11-13 Iain Sandoe -+ -+ Backport from mainline r181319 -+ Makefile.in: Change dependency on oscons to depend on the generator -+ tool. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libada/Makefile.in -+++ b/src/libada/Makefile.in -@@ -70,6 +70,7 @@ - version := $(shell cat $(srcdir)/../gcc/BASE-VER) - libsubdir := $(libdir)/gcc/$(target_noncanonical)/$(version)$(MULTISUBDIR) - ADA_RTS_DIR=$(GCC_DIR)/ada/rts$(subst /,_,$(MULTISUBDIR)) -+ADA_RTS_SUBDIR=./rts$(subst /,_,$(MULTISUBDIR)) - - # exeext should not be used because it's the *host* exeext. We're building - # a *target* library, aren't we?!? Likewise for CC. Still, provide bogus -@@ -96,10 +97,10 @@ - "CFLAGS=$(CFLAGS) $(WARN_CFLAGS)" - - # Rules to build gnatlib. --.PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared oscons -+.PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool - gnatlib: @default_gnatlib_target@ - --gnatlib-plain: oscons $(GCC_DIR)/ada/Makefile -+gnatlib-plain: osconstool $(GCC_DIR)/ada/Makefile - test -f stamp-libada || \ - $(MAKE) -C $(GCC_DIR)/ada $(LIBADA_FLAGS_TO_PASS) gnatlib \ - && touch stamp-libada -@@ -108,7 +109,7 @@ - $(LN_S) $(ADA_RTS_DIR) adainclude - $(LN_S) $(ADA_RTS_DIR) adalib - --gnatlib-sjlj gnatlib-zcx gnatlib-shared: oscons $(GCC_DIR)/ada/Makefile -+gnatlib-sjlj gnatlib-zcx gnatlib-shared: osconstool $(GCC_DIR)/ada/Makefile - test -f stamp-libada || \ - $(MAKE) -C $(GCC_DIR)/ada $(LIBADA_FLAGS_TO_PASS) $@ \ - && touch stamp-libada -@@ -117,8 +118,8 @@ - $(LN_S) $(ADA_RTS_DIR) adainclude - $(LN_S) $(ADA_RTS_DIR) adalib - --oscons: -- $(MAKE) -C $(GCC_DIR) $(LIBADA_FLAGS_TO_PASS) ada/s-oscons.ads -+osconstool: -+ $(MAKE) -C $(GCC_DIR)/ada $(LIBADA_FLAGS_TO_PASS) ./bldtools/oscons/xoscons - - install-gnatlib: $(GCC_DIR)/ada/Makefile - $(MAKE) -C $(GCC_DIR)/ada $(LIBADA_FLAGS_TO_PASS) install-gnatlib ---- a/src/libffi/ChangeLog -+++ b/src/libffi/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager + static bool +-vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) ++vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) + { + tree lhs, rhs; + tree def; +@@ -4450,7 +4899,7 @@ + if (TREE_CODE (lhs) == SSA_NAME) + { + gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); +- if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def, ++ if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def, + &dt)) + return false; + } +@@ -4461,7 +4910,7 @@ + if (TREE_CODE (rhs) == SSA_NAME) + { + gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); +- if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def, ++ if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def, + &dt)) + return false; + } +@@ -4487,7 +4936,8 @@ - * GCC 4.6.2 released. ---- a/src/libffi/configure -+++ b/src/libffi/configure -@@ -9001,7 +9001,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9914,7 +9914,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9932,7 +9932,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libgcc/ChangeLog -+++ b/src/libgcc/ChangeLog -@@ -1,3 +1,8 @@ -+2011-11-23 Gerald Pfeifer -+ -+ * config.host (*-*-freebsd[12], *-*-freebsd[12].*, -+ *-*-freebsd*aout*): Remove. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libgcc/config.host -+++ b/src/libgcc/config.host -@@ -145,15 +145,6 @@ - asm_hidden_op=.private_extern - tmake_file="t-darwin ${cpu_type}/t-darwin t-slibgcc-darwin" - ;; --*-*-freebsd[12] | *-*-freebsd[12].* | *-*-freebsd*aout*) -- # This is the place-holder for the generic a.out configuration -- # of FreeBSD. No actual configuration resides here since -- # there was only ever a bare-bones ix86 configuration for -- # a.out and it exists solely in the machine-specific section. -- # This place-holder must exist to avoid dropping into -- # the generic ELF configuration of FreeBSD (i.e. it must be -- # ordered before that section). -- ;; - *-*-freebsd*) - # This is the generic ELF configuration of FreeBSD. Later - # machine-specific sections may refine and add to this ---- a/src/libgfortran/ChangeLog -+++ b/src/libgfortran/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager + bool + vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, +- gimple *vec_stmt, tree reduc_def, int reduc_index) ++ gimple *vec_stmt, tree reduc_def, int reduc_index, ++ slp_tree slp_node) + { + tree scalar_dest = NULL_TREE; + tree vec_dest = NULL_TREE; +@@ -4504,19 +4954,24 @@ + tree def; + enum vect_def_type dt, dts[4]; + int nunits = TYPE_VECTOR_SUBPARTS (vectype); +- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; ++ int ncopies; + enum tree_code code; + stmt_vec_info prev_stmt_info = NULL; +- int j; ++ int i, j; ++ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); ++ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; ++ VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL; - * GCC 4.6.2 released. ---- a/src/libgfortran/configure -+++ b/src/libgfortran/configure -@@ -10326,7 +10326,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -11242,7 +11242,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -11260,7 +11260,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -14162,7 +14162,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct_FC=yes - hardcode_minus_L_FC=yes -@@ -14870,7 +14870,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -14888,7 +14888,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libgomp/ChangeLog -+++ b/src/libgomp/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager +- /* FORNOW: unsupported in basic block SLP. */ +- gcc_assert (loop_vinfo); ++ if (slp_node || PURE_SLP_STMT (stmt_info)) ++ ncopies = 1; ++ else ++ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; - * GCC 4.6.2 released. ---- a/src/libgomp/configure -+++ b/src/libgomp/configure -@@ -9317,7 +9317,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -10230,7 +10230,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -10248,7 +10248,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -13164,7 +13164,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds_FC='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct_FC=yes - hardcode_minus_L_FC=yes -@@ -13869,7 +13869,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -13887,7 +13887,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libiberty/ChangeLog -+++ b/src/libiberty/ChangeLog -@@ -1,3 +1,17 @@ -+2011-11-13 Iain Sandoe -+ -+ PR target/48108 -+ Backport from mainline r180523 -+ * simple-object-mach-o.c (GNU_WRAPPER_SECTS, GNU_WRAPPER_INDEX, -+ GNU_WRAPPER_NAMES): New macros. -+ (simple_object_mach_o_segment): Handle wrapper scheme. -+ (simple_object_mach_o_write_section_header): Allow the segment name -+ to be supplied. -+ (simple_object_mach_o_write_segment): Handle wrapper scheme. Ensure -+ that the top-level segment name in the load command is empty. -+ (simple_object_mach_o_write_to_file): Determine the number of -+ sections during segment output, use that in writing the header. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libiberty/simple-object-mach-o.c -+++ b/src/libiberty/simple-object-mach-o.c -@@ -1,5 +1,5 @@ - /* simple-object-mach-o.c -- routines to manipulate Mach-O object files. -- Copyright 2010 Free Software Foundation, Inc. -+ Copyright 2010, 2011 Free Software Foundation, Inc. - Written by Ian Lance Taylor, Google. - - This program is free software; you can redistribute it and/or modify it -@@ -174,6 +174,15 @@ - - #define GNU_SECTION_NAMES "__section_names" - -+/* A GNU-specific extension to wrap multiple sections using three -+ mach-o sections within a given segment. The section '__wrapper_sects' -+ is subdivided according to the index '__wrapper_index' and each sub -+ sect is named according to the names supplied in '__wrapper_names'. */ -+ -+#define GNU_WRAPPER_SECTS "__wrapper_sects" -+#define GNU_WRAPPER_INDEX "__wrapper_index" -+#define GNU_WRAPPER_NAMES "__wrapper_names" -+ - /* Private data for an simple_object_read. */ - - struct simple_object_mach_o_read -@@ -214,7 +223,18 @@ - unsigned int reserved; - }; + gcc_assert (ncopies >= 1); +- if (reduc_index && ncopies > 1) ++ if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info))) + return false; /* FORNOW */ --/* See if we have a Mach-O file. */ -+/* See if we have a Mach-O MH_OBJECT file: -+ -+ A standard MH_OBJECT (from as) will have three load commands: -+ 0 - LC_SEGMENT/LC_SEGMENT64 -+ 1 - LC_SYMTAB -+ 2 - LC_DYSYMTAB -+ -+ The LC_SEGMENT/LC_SEGMENT64 will introduce a single anonymous segment -+ containing all the sections. -+ -+ Files written by simple-object will have only the segment command -+ (no symbol tables). */ +- if (!STMT_VINFO_RELEVANT_P (stmt_info)) ++ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) + return false; - static void * - simple_object_mach_o_match ( -@@ -356,8 +376,29 @@ - } - } + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def +@@ -4524,10 +4979,6 @@ + && reduc_def)) + return false; --/* Handle a segment in a Mach-O file. Return 1 if we should continue, -- 0 if the caller should return. */ -+/* Handle a segment in a Mach-O Object file. -+ -+ This will callback to the function pfn for each "section found" the meaning -+ of which depends on gnu extensions to mach-o: -+ -+ If we find mach-o sections (with the segment name as specified) which also -+ contain: a 'sects' wrapper, an index, and a name table, we expand this into -+ as many sections as are specified in the index. In this case, there will -+ be a callback for each of these. -+ -+ We will also allow an extension that permits long names (more than 16 -+ characters) to be used with mach-o. In this case, the section name has -+ a specific format embedding an index into a name table, and the file must -+ contain such name table. -+ -+ Return 1 if we should continue, 0 if the caller should return. */ -+ -+#define SOMO_SECTS_PRESENT 0x01 -+#define SOMO_INDEX_PRESENT 0x02 -+#define SOMO_NAMES_PRESENT 0x04 -+#define SOMO_LONGN_PRESENT 0x08 -+#define SOMO_WRAPPING (SOMO_SECTS_PRESENT | SOMO_INDEX_PRESENT \ -+ | SOMO_NAMES_PRESENT) +- /* FORNOW: SLP not supported. */ +- if (STMT_SLP_TYPE (stmt_info)) +- return false; +- + /* FORNOW: not yet supported. */ + if (STMT_VINFO_LIVE_P (stmt_info)) + { +@@ -4551,7 +5002,7 @@ + then_clause = TREE_OPERAND (op, 1); + else_clause = TREE_OPERAND (op, 2); - static int - simple_object_mach_o_segment (simple_object_read *sobj, off_t offset, -@@ -378,9 +419,20 @@ - unsigned int nsects; - unsigned char *secdata; - unsigned int i; -+ unsigned int gnu_sections_found; - unsigned int strtab_index; -+ unsigned int index_index; -+ unsigned int nametab_index; -+ unsigned int sections_index; - char *strtab; -+ char *nametab; -+ unsigned char *index; - size_t strtab_size; -+ size_t nametab_size; -+ size_t index_size; -+ unsigned int n_wrapped_sects; -+ size_t wrapper_sect_size; -+ off_t wrapper_sect_offset; - - fetch_32 = (omr->is_big_endian - ? simple_object_fetch_big_32 -@@ -409,6 +461,8 @@ - nsects)); - } - -+ /* Fetch the section headers from the segment command. */ -+ - secdata = XNEWVEC (unsigned char, nsects * sechdrsize); - if (!simple_object_internal_read (sobj->descriptor, offset + seghdrsize, - secdata, nsects * sechdrsize, errmsg, err)) -@@ -417,9 +471,13 @@ - return 0; - } - -- /* Scan for a __section_names section. This is in effect a GNU -- extension that permits section names longer than 16 chars. */ -+ /* Scan for special sections that signal GNU extensions to the format. */ - -+ gnu_sections_found = 0; -+ index_index = nsects; -+ sections_index = nsects; -+ strtab_index = nsects; -+ nametab_index = nsects; - for (i = 0; i < nsects; ++i) - { - size_t nameoff; -@@ -427,19 +485,104 @@ - nameoff = i * sechdrsize + segname_offset; - if (strcmp ((char *) secdata + nameoff, omr->segment_name) != 0) - continue; -+ - nameoff = i * sechdrsize + sectname_offset; -- if (strcmp ((char *) secdata + nameoff, GNU_SECTION_NAMES) == 0) -- break; -+ if (strcmp ((char *) secdata + nameoff, GNU_WRAPPER_NAMES) == 0) -+ { -+ nametab_index = i; -+ gnu_sections_found |= SOMO_NAMES_PRESENT; -+ } -+ else if (strcmp ((char *) secdata + nameoff, GNU_WRAPPER_INDEX) == 0) -+ { -+ index_index = i; -+ gnu_sections_found |= SOMO_INDEX_PRESENT; -+ } -+ else if (strcmp ((char *) secdata + nameoff, GNU_WRAPPER_SECTS) == 0) -+ { -+ sections_index = i; -+ gnu_sections_found |= SOMO_SECTS_PRESENT; -+ } -+ else if (strcmp ((char *) secdata + nameoff, GNU_SECTION_NAMES) == 0) -+ { -+ strtab_index = i; -+ gnu_sections_found |= SOMO_LONGN_PRESENT; -+ } - } +- if (!vect_is_simple_cond (cond_expr, loop_vinfo)) ++ if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo)) + return false; -- strtab_index = i; -- if (strtab_index >= nsects) -+ /* If any of the special wrapper section components is present, then -+ they all should be. */ -+ -+ if ((gnu_sections_found & SOMO_WRAPPING) != 0) + /* We do not handle two different vector types for the condition +@@ -4563,7 +5014,7 @@ + if (TREE_CODE (then_clause) == SSA_NAME) { -- strtab = NULL; -- strtab_size = 0; -+ off_t nametab_offset; -+ off_t index_offset; -+ -+ if ((gnu_sections_found & SOMO_WRAPPING) != SOMO_WRAPPING) -+ { -+ *errmsg = "GNU Mach-o section wrapper: required section missing"; -+ *err = 0; /* No useful errno. */ -+ XDELETEVEC (secdata); -+ return 0; -+ } -+ -+ /* Fetch the name table. */ -+ -+ simple_object_mach_o_section_info (omr->is_big_endian, is_32, -+ secdata + nametab_index * sechdrsize, -+ &nametab_offset, &nametab_size); -+ nametab = XNEWVEC (char, nametab_size); -+ if (!simple_object_internal_read (sobj->descriptor, -+ sobj->offset + nametab_offset, -+ (unsigned char *) nametab, nametab_size, -+ errmsg, err)) -+ { -+ XDELETEVEC (nametab); -+ XDELETEVEC (secdata); -+ return 0; -+ } -+ -+ /* Fetch the index. */ -+ -+ simple_object_mach_o_section_info (omr->is_big_endian, is_32, -+ secdata + index_index * sechdrsize, -+ &index_offset, &index_size); -+ index = XNEWVEC (unsigned char, index_size); -+ if (!simple_object_internal_read (sobj->descriptor, -+ sobj->offset + index_offset, -+ index, index_size, -+ errmsg, err)) -+ { -+ XDELETEVEC (index); -+ XDELETEVEC (nametab); -+ XDELETEVEC (secdata); -+ return 0; -+ } -+ -+ /* The index contains 4 unsigned ints per sub-section: -+ sub-section offset/length, sub-section name/length. -+ We fix this for both 32 and 64 bit mach-o for now, since -+ other fields limit the maximum size of an object to 4G. */ -+ n_wrapped_sects = index_size / 16; -+ -+ /* Get the parameters for the wrapper too. */ -+ simple_object_mach_o_section_info (omr->is_big_endian, is_32, -+ secdata + sections_index * sechdrsize, -+ &wrapper_sect_offset, -+ &wrapper_sect_size); + gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause); +- if (!vect_is_simple_use (then_clause, loop_vinfo, NULL, ++ if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo, + &then_def_stmt, &def, &dt)) + return false; } - else +@@ -4575,7 +5026,7 @@ + if (TREE_CODE (else_clause) == SSA_NAME) { -+ index = NULL; -+ index_size = 0; -+ nametab = NULL; -+ nametab_size = 0; -+ n_wrapped_sects = 0; -+ } -+ -+ /* If we have a long names section, fetch it. */ -+ -+ if ((gnu_sections_found & SOMO_LONGN_PRESENT) != 0) -+ { - off_t strtab_offset; - - simple_object_mach_o_section_info (omr->is_big_endian, is_32, -@@ -452,52 +595,120 @@ - errmsg, err)) - { - XDELETEVEC (strtab); -+ XDELETEVEC (index); -+ XDELETEVEC (nametab); - XDELETEVEC (secdata); - return 0; - } + gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause); +- if (!vect_is_simple_use (else_clause, loop_vinfo, NULL, ++ if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo, + &else_def_stmt, &def, &dt)) + return false; } -+ else +@@ -4593,7 +5044,15 @@ + return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode); + } + +- /* Transform */ ++ /* Transform. */ ++ ++ if (!slp_node) + { -+ strtab = NULL; -+ strtab_size = 0; -+ strtab_index = nsects; ++ vec_oprnds0 = VEC_alloc (tree, heap, 1); ++ vec_oprnds1 = VEC_alloc (tree, heap, 1); ++ vec_oprnds2 = VEC_alloc (tree, heap, 1); ++ vec_oprnds3 = VEC_alloc (tree, heap, 1); + } - /* Process the sections. */ - - for (i = 0; i < nsects; ++i) + /* Handle def. */ + scalar_dest = gimple_assign_lhs (stmt); +@@ -4602,67 +5061,118 @@ + /* Handle cond expr. */ + for (j = 0; j < ncopies; j++) { - const unsigned char *sechdr; -- char namebuf[MACH_O_NAME_LEN + 1]; -+ char namebuf[MACH_O_NAME_LEN * 2 + 2]; - char *name; - off_t secoffset; - size_t secsize; -+ int l; -+ -+ sechdr = secdata + i * sechdrsize; -+ -+ /* We've already processed the long section names. */ - -- if (i == strtab_index) -+ if ((gnu_sections_found & SOMO_LONGN_PRESENT) != 0 -+ && i == strtab_index) - continue; - -- sechdr = secdata + i * sechdrsize; -+ /* We only act on the segment named. */ - - if (strcmp ((char *) sechdr + segname_offset, omr->segment_name) != 0) - continue; - -- memcpy (namebuf, sechdr + sectname_offset, MACH_O_NAME_LEN); -- namebuf[MACH_O_NAME_LEN] = '\0'; -+ /* Process sections associated with the wrapper. */ - -- name = &namebuf[0]; -- if (strtab != NULL && name[0] == '_' && name[1] == '_') -+ if ((gnu_sections_found & SOMO_WRAPPING) != 0) +- gimple new_stmt; ++ gimple new_stmt = NULL; + if (j == 0) { -- unsigned long stringoffset; -+ if (i == nametab_index || i == index_index) -+ continue; - -- if (sscanf (name + 2, "%08lX", &stringoffset) == 1) -+ if (i == sections_index) - { -- if (stringoffset >= strtab_size) -+ unsigned int j; -+ for (j = 0; j < n_wrapped_sects; ++j) - { -- *errmsg = "section name offset out of range"; -- *err = 0; -- XDELETEVEC (strtab); -- XDELETEVEC (secdata); -- return 0; -+ unsigned int subsect_offset, subsect_length, name_offset; -+ subsect_offset = (*fetch_32) (index + 16 * j); -+ subsect_length = (*fetch_32) (index + 16 * j + 4); -+ name_offset = (*fetch_32) (index + 16 * j + 8); -+ /* We don't need the name_length yet. */ -+ -+ secoffset = wrapper_sect_offset + subsect_offset; -+ secsize = subsect_length; -+ name = nametab + name_offset; +- gimple gtemp; +- vec_cond_lhs = ++ if (slp_node) ++ { ++ VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4); ++ VEC (slp_void_p, heap) *vec_defs; + -+ if (!(*pfn) (data, name, secoffset, secsize)) -+ { -+ *errmsg = NULL; -+ *err = 0; -+ XDELETEVEC (index); -+ XDELETEVEC (nametab); -+ XDELETEVEC (strtab); -+ XDELETEVEC (secdata); -+ return 0; -+ } - } -- -- name = strtab + stringoffset; -+ continue; - } - } - -+ if ((gnu_sections_found & SOMO_LONGN_PRESENT) != 0) -+ { -+ memcpy (namebuf, sechdr + sectname_offset, MACH_O_NAME_LEN); -+ namebuf[MACH_O_NAME_LEN] = '\0'; ++ vec_defs = VEC_alloc (slp_void_p, heap, 4); ++ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0)); ++ VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1)); ++ VEC_safe_push (tree, heap, ops, then_clause); ++ VEC_safe_push (tree, heap, ops, else_clause); ++ vect_get_slp_defs (ops, slp_node, &vec_defs, -1); ++ vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); ++ vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs); + -+ name = &namebuf[0]; -+ if (strtab != NULL && name[0] == '_' && name[1] == '_') -+ { -+ unsigned long stringoffset; ++ VEC_free (tree, heap, ops); ++ VEC_free (slp_void_p, heap, vec_defs); ++ } ++ else ++ { ++ gimple gtemp; ++ vec_cond_lhs = + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), + stmt, NULL); +- vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, ++ vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo, + NULL, >emp, &def, &dts[0]); +- vec_cond_rhs = +- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), +- stmt, NULL); +- vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, +- NULL, >emp, &def, &dts[1]); +- if (reduc_index == 1) +- vec_then_clause = reduc_def; +- else +- { +- vec_then_clause = vect_get_vec_def_for_operand (then_clause, +- stmt, NULL); +- vect_is_simple_use (then_clause, loop_vinfo, +- NULL, >emp, &def, &dts[2]); +- } +- if (reduc_index == 2) +- vec_else_clause = reduc_def; +- else +- { +- vec_else_clause = vect_get_vec_def_for_operand (else_clause, + -+ if (sscanf (name + 2, "%08lX", &stringoffset) == 1) ++ vec_cond_rhs = ++ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), ++ stmt, NULL); ++ vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo, ++ NULL, >emp, &def, &dts[1]); ++ if (reduc_index == 1) ++ vec_then_clause = reduc_def; ++ else ++ { ++ vec_then_clause = vect_get_vec_def_for_operand (then_clause, ++ stmt, NULL); ++ vect_is_simple_use (then_clause, loop_vinfo, ++ NULL, >emp, &def, &dts[2]); ++ } ++ if (reduc_index == 2) ++ vec_else_clause = reduc_def; ++ else + { -+ if (stringoffset >= strtab_size) -+ { -+ *errmsg = "section name offset out of range"; -+ *err = 0; -+ XDELETEVEC (index); -+ XDELETEVEC (nametab); -+ XDELETEVEC (strtab); -+ XDELETEVEC (secdata); -+ return 0; -+ } -+ -+ name = strtab + stringoffset; ++ vec_else_clause = vect_get_vec_def_for_operand (else_clause, + stmt, NULL); +- vect_is_simple_use (else_clause, loop_vinfo, ++ vect_is_simple_use (else_clause, loop_vinfo, + NULL, >emp, &def, &dts[3]); + } -+ } -+ } -+ else -+ { -+ /* Otherwise, make a name like __segment,__section as per the -+ convention in mach-o asm. */ -+ name = &namebuf[0]; -+ memset (namebuf, 0, MACH_O_NAME_LEN * 2 + 2); -+ memcpy (namebuf, (char *) sechdr + segname_offset, MACH_O_NAME_LEN); -+ l = strlen (namebuf); -+ namebuf[l] = ','; -+ memcpy (namebuf + l + 1, (char *) sechdr + sectname_offset, -+ MACH_O_NAME_LEN); + } + } + else + { +- vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs); +- vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs); ++ vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], ++ VEC_pop (tree, vec_oprnds0)); ++ vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], ++ VEC_pop (tree, vec_oprnds1)); + vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2], +- vec_then_clause); ++ VEC_pop (tree, vec_oprnds2)); + vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3], +- vec_else_clause); ++ VEC_pop (tree, vec_oprnds3)); + } + - simple_object_mach_o_section_info (omr->is_big_endian, is_32, sechdr, - &secoffset, &secsize); - -@@ -505,12 +716,16 @@ - { - *errmsg = NULL; - *err = 0; -+ XDELETEVEC (index); -+ XDELETEVEC (nametab); - XDELETEVEC (strtab); - XDELETEVEC (secdata); - return 0; ++ if (!slp_node) ++ { ++ VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs); ++ VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs); ++ VEC_quick_push (tree, vec_oprnds2, vec_then_clause); ++ VEC_quick_push (tree, vec_oprnds3, vec_else_clause); } - } -+ XDELETEVEC (index); -+ XDELETEVEC (nametab); - XDELETEVEC (strtab); - XDELETEVEC (secdata); - -@@ -724,9 +939,9 @@ - simple_object_mach_o_write_section_header (simple_object_write *sobj, - int descriptor, - size_t sechdr_offset, -- const char *name, size_t secaddr, -- size_t secsize, size_t offset, -- unsigned int align, -+ const char *name, const char *segn, -+ size_t secaddr, size_t secsize, -+ size_t offset, unsigned int align, - const char **errmsg, int *err) - { - struct simple_object_mach_o_attributes *attrs = -@@ -748,7 +963,7 @@ - strncpy ((char *) hdr + offsetof (struct mach_o_section_32, sectname), - name, MACH_O_NAME_LEN); - strncpy ((char *) hdr + offsetof (struct mach_o_section_32, segname), -- sobj->segment_name, MACH_O_NAME_LEN); -+ segn, MACH_O_NAME_LEN); - set_32 (hdr + offsetof (struct mach_o_section_32, addr), secaddr); - set_32 (hdr + offsetof (struct mach_o_section_32, size), secsize); - set_32 (hdr + offsetof (struct mach_o_section_32, offset), offset); -@@ -773,7 +988,7 @@ - strncpy ((char *) hdr + offsetof (struct mach_o_section_64, sectname), - name, MACH_O_NAME_LEN); - strncpy ((char *) hdr + offsetof (struct mach_o_section_64, segname), -- sobj->segment_name, MACH_O_NAME_LEN); -+ segn, MACH_O_NAME_LEN); - set_64 (hdr + offsetof (struct mach_o_section_64, addr), secaddr); - set_64 (hdr + offsetof (struct mach_o_section_64, size), secsize); - set_32 (hdr + offsetof (struct mach_o_section_64, offset), offset); -@@ -793,11 +1008,25 @@ - sechdrsize, errmsg, err); - } - --/* Write out the single segment and the sections of a Mach-O file. */ -+/* Write out the single (anonymous) segment containing the sections of a Mach-O -+ Object file. -+ -+ As a GNU extension to mach-o, when the caller specifies a segment name in -+ sobj->segment_name, all the sections passed will be output under a single -+ mach-o section header. The caller's sections are indexed within this -+ 'wrapper' section by a table stored in a second mach-o section. Finally, -+ arbitrary length section names are permitted by the extension and these are -+ stored in a table in a third mach-o section. -+ -+ Note that this is only likely to make any sense for the __GNU_LTO segment -+ at present. + /* Arguments are ready. Create the new vector stmt. */ +- vec_compare = build2 (TREE_CODE (cond_expr), vectype, +- vec_cond_lhs, vec_cond_rhs); +- vec_cond_expr = build3 (VEC_COND_EXPR, vectype, +- vec_compare, vec_then_clause, vec_else_clause); ++ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs) ++ { ++ vec_cond_rhs = VEC_index (tree, vec_oprnds1, i); ++ vec_then_clause = VEC_index (tree, vec_oprnds2, i); ++ vec_else_clause = VEC_index (tree, vec_oprnds3, i); + -+ If the wrapper extension is not in force, we assume that the section name -+ is in the form __SEGMENT_NAME,__section_name as per Mach-O asm. */ - - static int - simple_object_mach_o_write_segment (simple_object_write *sobj, int descriptor, -- size_t nsects, const char **errmsg, -+ size_t *nsects, const char **errmsg, - int *err) - { - struct simple_object_mach_o_attributes *attrs = -@@ -814,6 +1043,10 @@ - simple_object_write_section *section; - unsigned char hdrbuf[sizeof (struct mach_o_segment_command_64)]; - unsigned char *hdr; -+ size_t nsects_in; -+ unsigned int *index; -+ char *snames; -+ unsigned int sect; ++ vec_compare = build2 (TREE_CODE (cond_expr), vectype, ++ vec_cond_lhs, vec_cond_rhs); ++ vec_cond_expr = build3 (VEC_COND_EXPR, vectype, ++ vec_compare, vec_then_clause, vec_else_clause); - set_32 = (attrs->is_big_endian - ? simple_object_set_big_32 -@@ -834,19 +1067,62 @@ - sechdrsize = sizeof (struct mach_o_section_64); - } +- new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- vect_finish_stmt_generation (stmt, new_stmt, gsi); +- if (j == 0) +- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; +- else +- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; ++ new_stmt = gimple_build_assign (vec_dest, vec_cond_expr); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ vect_finish_stmt_generation (stmt, new_stmt, gsi); ++ if (slp_node) ++ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt); ++ } -+ name_offset = 0; -+ *nsects = nsects_in = 0; -+ -+ /* Count the number of sections we start with. */ -+ -+ for (section = sobj->sections; section != NULL; section = section->next) -+ nsects_in++; -+ -+ if (sobj->segment_name != NULL) -+ { -+ /* We will only write 3 sections: wrapped data, index and names. */ -+ -+ *nsects = 3; -+ -+ /* The index has four entries per wrapped section: -+ Section Offset, length, Name offset, length. -+ Where the offsets are based at the start of the wrapper and name -+ sections respectively. -+ The values are stored as 32 bit int for both 32 and 64 bit mach-o -+ since the size of a mach-o MH_OBJECT cannot exceed 4G owing to -+ other constraints. */ -+ -+ index = XNEWVEC (unsigned int, nsects_in * 4); +- prev_stmt_info = vinfo_for_stmt (new_stmt); ++ if (slp_node) ++ continue; + -+ /* We now need to figure out the size of the names section. This just -+ stores the names as null-terminated c strings, packed without any -+ alignment padding. */ ++ if (j == 0) ++ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; ++ else ++ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + -+ for (section = sobj->sections, sect = 0; section != NULL; -+ section = section->next, sect++) -+ { -+ index[sect*4+2] = name_offset; -+ index[sect*4+3] = strlen (section->name) + 1; -+ name_offset += strlen (section->name) + 1; -+ } -+ snames = XNEWVEC (char, name_offset); -+ } -+ else -+ { -+ *nsects = nsects_in; -+ index = NULL; -+ snames = NULL; -+ } -+ - sechdr_offset = hdrsize + seghdrsize; -- cmdsize = seghdrsize + nsects * sechdrsize; -+ cmdsize = seghdrsize + *nsects * sechdrsize; - offset = hdrsize + cmdsize; -- name_offset = 0; - secaddr = 0; - -- for (section = sobj->sections; section != NULL; section = section->next) -+ for (section = sobj->sections, sect = 0; -+ section != NULL; section = section->next, sect++) - { - size_t mask; - size_t new_offset; - size_t secsize; - struct simple_object_write_section_buffer *buffer; -- char namebuf[MACH_O_NAME_LEN + 1]; - - mask = (1U << section->align) - 1; - new_offset = offset + mask; -@@ -877,39 +1153,126 @@ - secsize += buffer->size; - } ++ prev_stmt_info = vinfo_for_stmt (new_stmt); + } -- snprintf (namebuf, sizeof namebuf, "__%08X", name_offset); -+ if (sobj->segment_name != NULL) -+ { -+ index[sect*4+0] = (unsigned int) offset; -+ index[sect*4+1] = secsize; -+ /* Stash the section name in our table. */ -+ memcpy (snames + index[sect * 4 + 2], section->name, -+ index[sect * 4 + 3]); -+ } -+ else -+ { -+ char namebuf[MACH_O_NAME_LEN + 1]; -+ char segnbuf[MACH_O_NAME_LEN + 1]; -+ char *comma; -+ -+ /* Try to extract segment,section from the input name. */ -+ -+ memset (namebuf, 0, sizeof namebuf); -+ memset (segnbuf, 0, sizeof segnbuf); -+ comma = strchr (section->name, ','); -+ if (comma != NULL) -+ { -+ int len = comma - section->name; -+ len = len > MACH_O_NAME_LEN ? MACH_O_NAME_LEN : len; -+ strncpy (namebuf, section->name, len); -+ strncpy (segnbuf, comma + 1, MACH_O_NAME_LEN); -+ } -+ else /* just try to copy the name, leave segment blank. */ -+ strncpy (namebuf, section->name, MACH_O_NAME_LEN); -+ -+ if (!simple_object_mach_o_write_section_header (sobj, descriptor, -+ sechdr_offset, -+ namebuf, segnbuf, -+ secaddr, secsize, -+ offset, -+ section->align, -+ errmsg, err)) -+ return 0; -+ sechdr_offset += sechdrsize; -+ } -+ -+ offset += secsize; -+ secaddr += secsize; -+ } ++ VEC_free (tree, heap, vec_oprnds0); ++ VEC_free (tree, heap, vec_oprnds1); ++ VEC_free (tree, heap, vec_oprnds2); ++ VEC_free (tree, heap, vec_oprnds3); + -+ if (sobj->segment_name != NULL) -+ { -+ size_t secsize; -+ unsigned int i; -+ -+ /* Write the section header for the wrapper. */ -+ /* Account for any initial aligment - which becomes the alignment for this -+ created section. */ -+ -+ secsize = (offset - index[0]); - if (!simple_object_mach_o_write_section_header (sobj, descriptor, -- sechdr_offset, namebuf, -- secaddr, secsize, offset, -- section->align, -+ sechdr_offset, -+ GNU_WRAPPER_SECTS, -+ sobj->segment_name, -+ 0 /*secaddr*/, -+ secsize, index[0], -+ sobj->sections->align, - errmsg, err)) - return 0; - -+ /* Subtract the wrapper section start from the begining of each sub -+ section. */ -+ -+ for (i = 1; i < nsects_in; ++i) -+ index[4 * i] -= index[0]; -+ index[0] = 0; -+ - sechdr_offset += sechdrsize; -- offset += secsize; -- name_offset += strlen (section->name) + 1; -- secaddr += secsize; -- } + return true; + } -- /* Write out the section names. */ -+ /* Write out the section names. -+ ... the header ... -+ name_offset contains the length of the section. It is not aligned. */ - -- if (!simple_object_mach_o_write_section_header (sobj, descriptor, -- sechdr_offset, -- GNU_SECTION_NAMES, secaddr, -- name_offset, offset, 0, -- errmsg, err)) -- return 0; -+ if (!simple_object_mach_o_write_section_header (sobj, descriptor, -+ sechdr_offset, -+ GNU_WRAPPER_NAMES, -+ sobj->segment_name, -+ 0 /*secaddr*/, -+ name_offset, -+ offset, -+ 0, errmsg, err)) -+ return 0; +@@ -4677,6 +5187,7 @@ + enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); + bool ok; + tree scalar_type, vectype; ++ gimple pattern_stmt, pattern_def_stmt; -- for (section = sobj->sections; section != NULL; section = section->next) -- { -- size_t namelen; -+ /* ... and the content.. */ -+ if (!simple_object_internal_write (descriptor, offset, -+ (const unsigned char *) snames, -+ name_offset, errmsg, err)) -+ return 0; + if (vect_print_dump_info (REPORT_DETAILS)) + { +@@ -4698,16 +5209,70 @@ + - any LABEL_EXPRs in the loop + - computations that are used only for array indexing or loop control. + In basic blocks we only analyze statements that are a part of some SLP +- instance, therefore, all the statements are relevant. */ ++ instance, therefore, all the statements are relevant. + -+ sechdr_offset += sechdrsize; -+ secaddr += name_offset; -+ offset += name_offset; -+ -+ /* Now do the index, we'll align this to 4 bytes although the read code -+ will handle unaligned. */ -+ -+ offset += 3; -+ offset &= ~0x03; -+ if (!simple_object_mach_o_write_section_header (sobj, descriptor, -+ sechdr_offset, -+ GNU_WRAPPER_INDEX, -+ sobj->segment_name, -+ 0 /*secaddr*/, -+ nsects_in * 16, -+ offset, -+ 2, errmsg, err)) -+ return 0; ++ Pattern statement needs to be analyzed instead of the original statement ++ if the original statement is not relevant. Otherwise, we analyze both ++ statements. */ -- namelen = strlen (section->name) + 1; -+ /* ... and the content.. */ - if (!simple_object_internal_write (descriptor, offset, -- (const unsigned char *) section->name, -- namelen, errmsg, err)) -+ (const unsigned char *) index, -+ nsects_in*16, errmsg, err)) - return 0; -- offset += namelen; -+ -+ XDELETEVEC (index); -+ XDELETEVEC (snames); - } - - /* Write out the segment header. */ -@@ -923,9 +1286,8 @@ - MACH_O_LC_SEGMENT); - set_32 (hdr + offsetof (struct mach_o_segment_command_32, cmdsize), - cmdsize); -- strncpy (((char *) hdr -- + offsetof (struct mach_o_segment_command_32, segname)), -- sobj->segment_name, MACH_O_NAME_LEN); -+ /* MH_OBJECTS have a single, anonymous, segment - so the segment name -+ is left empty. */ - /* vmaddr left as zero. */ - /* vmsize left as zero. */ - set_32 (hdr + offsetof (struct mach_o_segment_command_32, fileoff), -@@ -935,7 +1297,7 @@ - /* maxprot left as zero. */ - /* initprot left as zero. */ - set_32 (hdr + offsetof (struct mach_o_segment_command_32, nsects), -- nsects); -+ *nsects); - /* flags left as zero. */ - } - else -@@ -951,9 +1313,8 @@ - MACH_O_LC_SEGMENT); - set_32 (hdr + offsetof (struct mach_o_segment_command_64, cmdsize), - cmdsize); -- strncpy (((char *) hdr -- + offsetof (struct mach_o_segment_command_64, segname)), -- sobj->segment_name, MACH_O_NAME_LEN); -+ /* MH_OBJECTS have a single, anonymous, segment - so the segment name -+ is left empty. */ - /* vmaddr left as zero. */ - /* vmsize left as zero. */ - set_64 (hdr + offsetof (struct mach_o_segment_command_64, fileoff), -@@ -963,7 +1324,7 @@ - /* maxprot left as zero. */ - /* initprot left as zero. */ - set_32 (hdr + offsetof (struct mach_o_segment_command_64, nsects), -- nsects); -+ *nsects); - /* flags left as zero. */ - #endif ++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + { +- if (vect_print_dump_info (REPORT_DETAILS)) +- fprintf (vect_dump, "irrelevant."); ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && pattern_stmt ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ /* Analyze PATTERN_STMT instead of the original stmt. */ ++ stmt = pattern_stmt; ++ stmt_info = vinfo_for_stmt (pattern_stmt); ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern statement: "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } ++ } ++ else ++ { ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ fprintf (vect_dump, "irrelevant."); + +- return true; ++ return true; ++ } } -@@ -978,23 +1339,17 @@ - simple_object_mach_o_write_to_file (simple_object_write *sobj, int descriptor, - int *err) - { -- size_t nsects; -- simple_object_write_section *section; -+ size_t nsects = 0; - const char *errmsg; - -- /* Start at 1 for symbol_names section. */ -- nsects = 1; -- for (section = sobj->sections; section != NULL; section = section->next) -- ++nsects; -+ if (!simple_object_mach_o_write_segment (sobj, descriptor, &nsects, -+ &errmsg, err)) -+ return errmsg; - - if (!simple_object_mach_o_write_header (sobj, descriptor, nsects, - &errmsg, err)) - return errmsg; - -- if (!simple_object_mach_o_write_segment (sobj, descriptor, nsects, -- &errmsg, err)) -- return errmsg; -- - return NULL; - } - ---- a/src/libjava/ChangeLog -+++ b/src/libjava/ChangeLog -@@ -1,3 +1,16 @@ -+2011-11-24 Jakub Jelinek -+ -+ PR bootstrap/50888 -+ * prims.cc: Don't include ctype.h. -+ (c_isspace): Define. -+ (next_property_key, next_property_value): Use it instead -+ of isspace. -+ -+2011-11-20 Andreas Tobler ++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && pattern_stmt ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) ++ { ++ /* Analyze PATTERN_STMT too. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern statement: "); ++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); ++ } + -+ * configure.ac: Fix FreeBSD 10 detection. -+ * configure: Regenerate. ++ if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node)) ++ return false; ++ } + - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libjava/classpath/ChangeLog.gcj -+++ b/src/libjava/classpath/ChangeLog.gcj -@@ -1,3 +1,9 @@ -+2011-11-29 Andreas Tobler -+ -+ * config.rpath (ld_shlibs): Fix detection of FreeBSD-10 and up. -+ (libname_spec): Likewise. -+ * configure: Regenerate with autoconf -I ../../. -+ - 2011-02-13 Ralf Wildenhues - - * config.rpath, ltcf-c.sh, ltcf-gcj.sh, ltconfig: Remove ---- a/src/libjava/classpath/config.rpath -+++ b/src/libjava/classpath/config.rpath -@@ -361,7 +361,7 @@ - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - ;; -- freebsd2*) -+ freebsd2.*) - hardcode_direct=yes - hardcode_minus_L=yes - ;; -@@ -533,7 +533,7 @@ - ;; - freebsd* | dragonfly*) - case "$host_os" in -- freebsd[123]*) -+ freebsd[23].*) - library_names_spec='$libname$shrext$versuffix' ;; - *) - library_names_spec='$libname$shrext' ;; ---- a/src/libjava/classpath/configure -+++ b/src/libjava/classpath/configure -@@ -10025,7 +10025,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -10941,7 +10941,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -10959,7 +10959,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -14225,7 +14225,7 @@ - esac - ;; - -- freebsd[12]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - ld_shlibs_CXX=no -@@ -16000,7 +16000,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -16018,7 +16018,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libjava/configure -+++ b/src/libjava/configure -@@ -11560,7 +11560,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -12476,7 +12476,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -12494,7 +12494,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -14384,7 +14384,7 @@ - esac - ;; - -- freebsd[12]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - ld_shlibs_CXX=no -@@ -16159,7 +16159,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -16177,7 +16177,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -18520,7 +18520,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct_GCJ=yes - hardcode_minus_L_GCJ=yes -@@ -20715,7 +20715,7 @@ - *-*-cygwin*) - # Don't set THREADLIBS here. Cygwin doesn't have -lpthread. - ;; -- *-*-freebsd[1234]*) -+ *-*-freebsd[34].*) - # Before FreeBSD 5, it didn't have -lpthread (or any library which - # merely adds pthread_* functions) but it does have a -pthread switch - # which is required at link-time to select -lc_r *instead* of -lc. ---- a/src/libjava/configure.ac -+++ b/src/libjava/configure.ac -@@ -1060,7 +1060,7 @@ - *-*-cygwin*) - # Don't set THREADLIBS here. Cygwin doesn't have -lpthread. - ;; -- *-*-freebsd[[1234]]*) -+ *-*-freebsd[[34]].*) - # Before FreeBSD 5, it didn't have -lpthread (or any library which - # merely adds pthread_* functions) but it does have a -pthread switch - # which is required at link-time to select -lc_r *instead* of -lc. ---- a/src/libjava/libltdl/ChangeLog -+++ b/src/libjava/libltdl/ChangeLog -@@ -1,3 +1,8 @@ -+2011-11-20 Andreas Tobler ++ if (is_pattern_stmt_p (stmt_info) ++ && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info)) ++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt)) ++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))) ++ { ++ /* Analyze def stmt of STMT if it's a pattern stmt. */ ++ if (vect_print_dump_info (REPORT_DETAILS)) ++ { ++ fprintf (vect_dump, "==> examining pattern def statement: "); ++ print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM); ++ } + -+ * acinclude.m4: Additional FreeBSD 10 fixes. -+ * configure: Regenerate. ++ if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node)) ++ return false; ++ } + - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libjava/libltdl/acinclude.m4 -+++ b/src/libjava/libltdl/acinclude.m4 -@@ -1377,7 +1377,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[[01]]* | freebsdelf3.[[01]]*) -@@ -3035,7 +3035,7 @@ - ;; - esac - ;; -- freebsd[[12]]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before switch to ELF - _LT_AC_TAGVAR(ld_shlibs, $1)=no - ;; -@@ -5669,7 +5669,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - _LT_AC_TAGVAR(hardcode_direct, $1)=yes - _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes ---- a/src/libjava/libltdl/configure -+++ b/src/libjava/libltdl/configure -@@ -7355,7 +7355,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -7968,7 +7968,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libjava/prims.cc -+++ b/src/libjava/prims.cc -@@ -38,7 +38,6 @@ - #endif - - #ifndef DISABLE_GETENV_PROPERTIES --#include - #include - #define PROCESS_GCJ_PROPERTIES process_gcj_properties() - #else -@@ -985,6 +984,8 @@ - #ifndef DISABLE_GETENV_PROPERTIES + switch (STMT_VINFO_DEF_TYPE (stmt_info)) + { +@@ -4781,15 +5346,18 @@ + || vectorizable_call (stmt, NULL, NULL) + || vectorizable_store (stmt, NULL, NULL, NULL) + || vectorizable_reduction (stmt, NULL, NULL, NULL) +- || vectorizable_condition (stmt, NULL, NULL, NULL, 0)); ++ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL)); + else + { + if (bb_vinfo) +- ok = (vectorizable_shift (stmt, NULL, NULL, node) ++ ok = (vectorizable_type_promotion (stmt, NULL, NULL, node) ++ || vectorizable_type_demotion (stmt, NULL, NULL, node) ++ || vectorizable_shift (stmt, NULL, NULL, node) + || vectorizable_operation (stmt, NULL, NULL, node) + || vectorizable_assignment (stmt, NULL, NULL, node) + || vectorizable_load (stmt, NULL, NULL, node, NULL) +- || vectorizable_store (stmt, NULL, NULL, node)); ++ || vectorizable_store (stmt, NULL, NULL, node) ++ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)); + } -+#define c_isspace(c) (memchr (" \t\n\r\v\f", c, 6) != NULL) -+ - static char * - next_property_key (char *s, size_t *length) - { -@@ -993,7 +994,7 @@ - JvAssert (s); + if (!ok) +@@ -4825,27 +5393,6 @@ + return false; + } - // Skip over whitespace -- while (isspace (*s)) -+ while (c_isspace (*s)) - s++; +- if (!PURE_SLP_STMT (stmt_info)) +- { +- /* Groups of strided accesses whose size is not a power of 2 are not +- vectorizable yet using loop-vectorization. Therefore, if this stmt +- feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and +- loop-based vectorized), the loop cannot be vectorized. */ +- if (STMT_VINFO_STRIDED_ACCESS (stmt_info) +- && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( +- DR_GROUP_FIRST_DR (stmt_info)))) == -1) +- { +- if (vect_print_dump_info (REPORT_DETAILS)) +- { +- fprintf (vect_dump, "not vectorized: the size of group " +- "of strided accesses is not a power of 2"); +- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); +- } +- +- return false; +- } +- } +- + return true; + } - // If we've reached the end, return NULL. Also return NULL if for -@@ -1005,7 +1006,7 @@ +@@ -4862,7 +5409,6 @@ + bool is_store = false; + gimple vec_stmt = NULL; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +- gimple orig_stmt_in_pattern; + bool done; - // Determine the length of the property key. - while (s[l] != 0 -- && ! isspace (s[l]) -+ && ! c_isspace (s[l]) - && s[l] != ':' - && s[l] != '=') - { -@@ -1027,19 +1028,19 @@ + switch (STMT_VINFO_TYPE (stmt_info)) +@@ -4927,8 +5473,7 @@ + break; - JvAssert (s); + case condition_vec_info_type: +- gcc_assert (!slp_node); +- done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0); ++ done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node); + gcc_assert (done); + break; -- while (isspace (*s)) -+ while (c_isspace (*s)) - s++; +@@ -5001,21 +5546,7 @@ + } - if (*s == ':' - || *s == '=') - s++; + if (vec_stmt) +- { +- STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; +- orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); +- if (orig_stmt_in_pattern) +- { +- stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); +- /* STMT was inserted by the vectorizer to replace a computation idiom. +- ORIG_STMT_IN_PATTERN is a stmt in the original sequence that +- computed this idiom. We need to record a pointer to VEC_STMT in +- the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the +- documentation of vect_pattern_recog. */ +- if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) +- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; +- } +- } ++ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; -- while (isspace (*s)) -+ while (c_isspace (*s)) - s++; + return is_store; + } +@@ -5065,6 +5596,7 @@ + STMT_VINFO_VECTORIZABLE (res) = true; + STMT_VINFO_IN_PATTERN_P (res) = false; + STMT_VINFO_RELATED_STMT (res) = NULL; ++ STMT_VINFO_PATTERN_DEF_STMT (res) = NULL; + STMT_VINFO_DATA_REF (res) = NULL; - // Determine the length of the property value. - while (s[l] != 0 -- && ! isspace (s[l]) -+ && ! c_isspace (s[l]) - && s[l] != ':' - && s[l] != '=') + STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; +@@ -5402,8 +5934,12 @@ + || *dt == vect_nested_cycle) { ---- a/src/libmudflap/ChangeLog -+++ b/src/libmudflap/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libmudflap/configure -+++ b/src/libmudflap/configure -@@ -8818,7 +8818,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9731,7 +9731,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9749,7 +9749,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libobjc/ChangeLog -+++ b/src/libobjc/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libobjc/configure -+++ b/src/libobjc/configure -@@ -8797,7 +8797,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9713,7 +9713,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9731,7 +9731,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libquadmath/ChangeLog -+++ b/src/libquadmath/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libquadmath/configure -+++ b/src/libquadmath/configure -@@ -8727,7 +8727,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9643,7 +9643,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9661,7 +9661,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libssp/ChangeLog -+++ b/src/libssp/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler + stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt); +- if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + -+ * configure: Regenerate. ++ if (STMT_VINFO_IN_PATTERN_P (stmt_info) ++ && !STMT_VINFO_RELEVANT (stmt_info) ++ && !STMT_VINFO_LIVE_P (stmt_info)) + stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); + - 2011-10-26 Release Manager + *vectype = STMT_VINFO_VECTYPE (stmt_info); + gcc_assert (*vectype != NULL_TREE); + } +@@ -5452,7 +5988,7 @@ + { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); +- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); ++ struct loop *vect_loop = NULL; + bool ordered_p; + enum machine_mode vec_mode; + enum insn_code icode1, icode2; +@@ -5461,6 +5997,9 @@ + tree wide_vectype = vectype_out; + enum tree_code c1, c2; - * GCC 4.6.2 released. ---- a/src/libssp/configure -+++ b/src/libssp/configure -@@ -8864,7 +8864,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9780,7 +9780,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9798,7 +9798,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libstdc++-v3/ChangeLog -+++ b/src/libstdc++-v3/ChangeLog -@@ -1,3 +1,55 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ -+2011-11-16 Paolo Carlini -+ -+ PR libstdc++/51142 -+ * include/debug/unordered_map (unordered_map<>::erase(iterator), -+ unordered_multimap<>::erase(iterator)): Add, consistently with -+ LWG 2059. -+ * include/debug/unordered_set (unordered_set<>::erase(iterator), -+ unordered_multiset<>::erase(iterator)): Likewise. -+ * include/debug/map.h (map<>::erase(iterator)): Likewise. -+ * include/debug/multimap.h (multimap<>::erase(iterator)): Likewise. -+ * include/profile/map.h (map<>::erase(iterator)): Likewise. -+ * include/profile/multimap.h (multimap<>::erase(iterator)): Likewise. -+ * include/bits/hashtable.h (_Hashtable<>::erase(iterator)): Likewise. -+ * include/bits/stl_map.h (map<>::erase(iterator)): Likewise. -+ * include/bits/stl_multimap.h (multimap<>::erase(iterator)): Likewise. -+ * include/bits/stl_tree.h (_Rb_tree<>::erase(iterator)): Likewise. -+ * testsuite/23_containers/unordered_map/erase/51142.cc: New. -+ * testsuite/23_containers/multimap/modifiers/erase/51142.cc: Likewise. -+ * testsuite/23_containers/set/modifiers/erase/51142.cc: Likewise. -+ * testsuite/23_containers/unordered_multimap/erase/51142.cc: Likewise. -+ * testsuite/23_containers/unordered_set/erase/51142.cc: Likewise. -+ * testsuite/23_containers/multiset/modifiers/erase/51142.cc: Likewise. -+ * testsuite/23_containers/unordered_multiset/erase/51142.cc: Likewise. -+ * testsuite/23_containers/map/modifiers/erase/51142.cc: Likewise. -+ -+2011-11-15 Jason Dick -+ -+ PR libstdc++/51133 -+ * include/tr1/poly_hermite.tcc (__poly_hermite_recursion): Fix -+ wrong sign in recursion relation. -+ -+2011-11-02 Richard B. Kreckel -+ Paolo Carlini -+ -+ PR libstdc++/50880 -+ * include/std/complex (__complex_acosh): Fix in a better way, -+ use Kahan's formula. -+ * include/tr1/complex (__complex_acosh): Likewise. -+ -+2011-11-02 Richard B. Kreckel -+ Paolo Carlini -+ -+ PR libstdc++/50880 -+ * include/std/complex (__complex_acosh): Fix for __z.real() < 0. -+ * include/tr1/complex (__complex_acosh): Likewise. -+ * testsuite/26_numerics/complex/50880.cc: New. -+ * testsuite/tr1/8_c_compatibility/complex/50880.cc: Likewise. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/libstdc++-v3/configure -+++ b/src/libstdc++-v3/configure -@@ -9698,7 +9698,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -10614,7 +10614,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -10632,7 +10632,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) -@@ -12522,7 +12522,7 @@ - esac - ;; - -- freebsd[12]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - ld_shlibs_CXX=no -@@ -14297,7 +14297,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -14315,7 +14315,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/libstdc++-v3/include/bits/hashtable.h -+++ b/src/libstdc++-v3/include/bits/hashtable.h -@@ -440,6 +440,11 @@ - iterator - erase(const_iterator); - -+ // LWG 2059. -+ iterator -+ erase(iterator __it) -+ { return erase(const_iterator(__it)); } -+ - size_type - erase(const key_type&); - ---- a/src/libstdc++-v3/include/bits/stl_map.h -+++ b/src/libstdc++-v3/include/bits/stl_map.h -@@ -612,6 +612,11 @@ - iterator - erase(const_iterator __position) - { return _M_t.erase(__position); } -+ -+ // LWG 2059. -+ iterator -+ erase(iterator __position) -+ { return _M_t.erase(__position); } - #else - /** - * @brief Erases an element from a %map. ---- a/src/libstdc++-v3/include/bits/stl_multimap.h -+++ b/src/libstdc++-v3/include/bits/stl_multimap.h -@@ -533,6 +533,11 @@ - iterator - erase(const_iterator __position) - { return _M_t.erase(__position); } -+ -+ // LWG 2059. -+ iterator -+ erase(iterator __position) -+ { return _M_t.erase(__position); } - #else - /** - * @brief Erases an element from a %multimap. ---- a/src/libstdc++-v3/include/bits/stl_tree.h -+++ b/src/libstdc++-v3/include/bits/stl_tree.h -@@ -760,6 +760,16 @@ - _M_erase_aux(__position); - return __result._M_const_cast(); - } -+ -+ // LWG 2059. -+ iterator -+ erase(iterator __position) -+ { -+ iterator __result = __position; -+ ++__result; -+ _M_erase_aux(__position); -+ return __result; -+ } - #else - void - erase(iterator __position) ---- a/src/libstdc++-v3/include/debug/map.h -+++ b/src/libstdc++-v3/include/debug/map.h -@@ -273,6 +273,10 @@ - this->_M_invalidate_if(_Equal(__position.base())); - return iterator(_Base::erase(__position.base()), this); - } -+ -+ iterator -+ erase(iterator __position) -+ { return erase(const_iterator(__position)); } - #else - void - erase(iterator __position) ---- a/src/libstdc++-v3/include/debug/multimap.h -+++ b/src/libstdc++-v3/include/debug/multimap.h -@@ -254,6 +254,10 @@ - this->_M_invalidate_if(_Equal(__position.base())); - return iterator(_Base::erase(__position.base()), this); - } ++ if (loop_info) ++ vect_loop = LOOP_VINFO_LOOP (loop_info); + -+ iterator -+ erase(iterator __position) -+ { return erase(const_iterator(__position)); } - #else - void - erase(iterator __position) ---- a/src/libstdc++-v3/include/debug/unordered_map -+++ b/src/libstdc++-v3/include/debug/unordered_map -@@ -276,6 +276,10 @@ - } + /* The result of a vectorized widening operation usually requires two vectors + (because the widened results do not fit int one vector). The generated + vector results would normally be expected to be generated in the same +@@ -5481,7 +6020,8 @@ + iterations in parallel). We therefore don't allow to change the order + of the computation in the inner-loop during outer-loop vectorization. */ - iterator -+ erase(iterator __it) -+ { return erase(const_iterator(__it)); } -+ -+ iterator - erase(const_iterator __first, const_iterator __last) - { - __glibcxx_check_erase_range(__first, __last); -@@ -558,6 +562,10 @@ - } +- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction ++ if (vect_loop ++ && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction + && !nested_in_vect_loop_p (vect_loop, stmt)) + ordered_p = false; + else +@@ -5518,6 +6058,19 @@ + } + break; - iterator -+ erase(iterator __it) -+ { return erase(const_iterator(__it)); } ++ case WIDEN_LSHIFT_EXPR: ++ if (BYTES_BIG_ENDIAN) ++ { ++ c1 = VEC_WIDEN_LSHIFT_HI_EXPR; ++ c2 = VEC_WIDEN_LSHIFT_LO_EXPR; ++ } ++ else ++ { ++ c2 = VEC_WIDEN_LSHIFT_HI_EXPR; ++ c1 = VEC_WIDEN_LSHIFT_LO_EXPR; ++ } ++ break; + -+ iterator - erase(const_iterator __first, const_iterator __last) - { - __glibcxx_check_erase_range(__first, __last); ---- a/src/libstdc++-v3/include/debug/unordered_set -+++ b/src/libstdc++-v3/include/debug/unordered_set -@@ -269,6 +269,10 @@ - } + CASE_CONVERT: + if (BYTES_BIG_ENDIAN) + { +--- a/src/gcc/value-prof.c ++++ b/src/gcc/value-prof.c +@@ -1252,6 +1252,9 @@ + if (TREE_CODE (callee) == FUNCTION_DECL) + return false; - iterator -+ erase(iterator __it) -+ { return erase(const_iterator(__it)); } ++ if (gimple_call_internal_p (stmt)) ++ return false; + -+ iterator - erase(const_iterator __first, const_iterator __last) - { - __glibcxx_check_erase_range(__first, __last); -@@ -539,6 +543,10 @@ - } + histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL); + if (!histogram) + return false; +@@ -1640,6 +1643,7 @@ + tree callee; - iterator -+ erase(iterator __it) -+ { return erase(const_iterator(__it)); } -+ -+ iterator - erase(const_iterator __first, const_iterator __last) - { - __glibcxx_check_erase_range(__first, __last); ---- a/src/libstdc++-v3/include/profile/map.h -+++ b/src/libstdc++-v3/include/profile/map.h -@@ -326,6 +326,10 @@ - __profcxx_map_to_unordered_map_erase(this, size(), 1); - return __i; - } -+ -+ iterator -+ erase(iterator __position) -+ { return erase(const_iterator(__position)); } - #else - void - erase(iterator __position) ---- a/src/libstdc++-v3/include/profile/multimap.h -+++ b/src/libstdc++-v3/include/profile/multimap.h -@@ -225,6 +225,10 @@ - iterator - erase(const_iterator __position) - { return iterator(_Base::erase(__position)); } -+ -+ iterator -+ erase(iterator __position) -+ { return iterator(_Base::erase(__position)); } - #else - void - erase(iterator __position) ---- a/src/libstdc++-v3/include/std/complex -+++ b/src/libstdc++-v3/include/std/complex -@@ -1,7 +1,7 @@ - // The template and inlines for the -*- C++ -*- complex number classes. - - // Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, --// 2006, 2007, 2008, 2009, 2010 -+// 2006, 2007, 2008, 2009, 2010, 2011 - // Free Software Foundation, Inc. - // - // This file is part of the GNU ISO C++ Library. This library is free -@@ -1695,12 +1695,9 @@ - std::complex<_Tp> - __complex_acosh(const std::complex<_Tp>& __z) - { -- std::complex<_Tp> __t((__z.real() - __z.imag()) -- * (__z.real() + __z.imag()) - _Tp(1.0), -- _Tp(2.0) * __z.real() * __z.imag()); -- __t = std::sqrt(__t); -- -- return std::log(__t + __z); -+ // Kahan's formula. -+ return _Tp(2.0) * std::log(std::sqrt(_Tp(0.5) * (__z + _Tp(1.0))) -+ + std::sqrt(_Tp(0.5) * (__z - _Tp(1.0)))); - } - - #if _GLIBCXX_USE_C99_COMPLEX_TR1 ---- a/src/libstdc++-v3/include/tr1/complex -+++ b/src/libstdc++-v3/include/tr1/complex -@@ -185,12 +185,9 @@ - std::complex<_Tp> - __complex_acosh(const std::complex<_Tp>& __z) - { -- std::complex<_Tp> __t((__z.real() - __z.imag()) -- * (__z.real() + __z.imag()) - _Tp(1.0), -- _Tp(2.0) * __z.real() * __z.imag()); -- __t = std::sqrt(__t); -- -- return std::log(__t + __z); -+ // Kahan's formula. -+ return _Tp(2.0) * std::log(std::sqrt(_Tp(0.5) * (__z + _Tp(1.0))) -+ + std::sqrt(_Tp(0.5) * (__z - _Tp(1.0)))); - } - - #if _GLIBCXX_USE_C99_COMPLEX_TR1 ---- a/src/libstdc++-v3/include/tr1/poly_hermite.tcc -+++ b/src/libstdc++-v3/include/tr1/poly_hermite.tcc -@@ -1,6 +1,6 @@ - // Special functions -*- C++ -*- + if (gimple_code (stmt) != GIMPLE_CALL ++ || gimple_call_internal_p (stmt) + || gimple_call_fndecl (stmt) != NULL_TREE) + return; --// Copyright (C) 2006, 2007, 2008, 2009, 2010 -+// Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 - // Free Software Foundation, Inc. - // - // This file is part of the GNU ISO C++ Library. This library is free -@@ -84,7 +84,7 @@ - unsigned int __i; - for (__H_nm2 = __H_0, __H_nm1 = __H_1, __i = 2; __i <= __n; ++__i) - { -- __H_n = 2 * (__x * __H_nm1 + (__i - 1) * __H_nm2); -+ __H_n = 2 * (__x * __H_nm1 - (__i - 1) * __H_nm2); - __H_nm2 = __H_nm1; - __H_nm1 = __H_n; - } --- a/src/libstdc++-v3/libsupc++/eh_arm.cc +++ b/src/libstdc++-v3/libsupc++/eh_arm.cc @@ -30,10 +30,11 @@ @@ -43475,578 +42351,3 @@ } return ctm_failed; ---- a/src/libstdc++-v3/testsuite/23_containers/map/modifiers/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/map/modifiers/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator<(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::map& s, X x) -+{ -+ std::map::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/multimap/modifiers/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/multimap/modifiers/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator<(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::multimap& s, X x) -+{ -+ std::multimap::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/multiset/modifiers/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/multiset/modifiers/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator<(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::multiset& s, X x) -+{ -+ std::multiset::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/set/modifiers/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/set/modifiers/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator<(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::set& s, X x) -+{ -+ std::set::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/unordered_map/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/unordered_map/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator==(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::unordered_map& s, X x) -+{ -+ std::unordered_map::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/unordered_multimap/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/unordered_multimap/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator==(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::unordered_multimap& s, X x) -+{ -+ std::unordered_multimap::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/unordered_multiset/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/unordered_multiset/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator==(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::unordered_multiset& s, X x) -+{ -+ std::unordered_multiset::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/23_containers/unordered_set/erase/51142.cc -+++ b/src/libstdc++-v3/testsuite/23_containers/unordered_set/erase/51142.cc -@@ -0,0 +1,38 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+// -+ -+// { dg-do compile } -+// { dg-options "-std=gnu++0x" } -+ -+#include -+ -+struct X -+{ -+ template -+ X(T&) {} -+}; -+ -+bool operator==(const X&, const X&) { return false; } -+ -+// LWG 2059. -+void erasor(std::unordered_set& s, X x) -+{ -+ std::unordered_set::iterator it = s.find(x); -+ if (it != s.end()) -+ s.erase(it); -+} ---- a/src/libstdc++-v3/testsuite/26_numerics/complex/50880.cc -+++ b/src/libstdc++-v3/testsuite/26_numerics/complex/50880.cc -@@ -0,0 +1,53 @@ -+// { dg-options "-std=gnu++0x" } -+// -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+ -+#include -+#include -+ -+template -+ void test01_do() -+ { -+ bool test __attribute__((unused)) = true; -+ -+ const std::complex ca(T(-2), T(2)); -+ const std::complex cb(T(-2), T(0)); -+ const std::complex cc(T(-2), T(-2)); -+ -+ std::complex cra = std::acosh(ca); -+ std::complex crb = std::acosh(cb); -+ std::complex crc = std::acosh(cc); -+ -+ VERIFY( cra.real() > T(0) ); -+ VERIFY( crb.real() > T(0) ); -+ VERIFY( crc.real() > T(0) ); -+ } -+ -+// libstdc++/50880 -+void test01() -+{ -+ test01_do(); -+ test01_do(); -+ test01_do(); -+} -+ -+int main() -+{ -+ test01(); -+ return 0; -+} ---- a/src/libstdc++-v3/testsuite/tr1/8_c_compatibility/complex/50880.cc -+++ b/src/libstdc++-v3/testsuite/tr1/8_c_compatibility/complex/50880.cc -@@ -0,0 +1,51 @@ -+// Copyright (C) 2011 Free Software Foundation, Inc. -+// -+// This file is part of the GNU ISO C++ Library. This library is free -+// software; you can redistribute it and/or modify it under the -+// terms of the GNU General Public License as published by the -+// Free Software Foundation; either version 3, or (at your option) -+// any later version. -+// -+// This library is distributed in the hope that it will be useful, -+// but WITHOUT ANY WARRANTY; without even the implied warranty of -+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+// GNU General Public License for more details. -+// -+// You should have received a copy of the GNU General Public License along -+// with this library; see the file COPYING3. If not see -+// . -+ -+#include -+#include -+ -+template -+ void test01_do() -+ { -+ bool test __attribute__((unused)) = true; -+ -+ const std::complex ca(T(-2), T(2)); -+ const std::complex cb(T(-2), T(0)); -+ const std::complex cc(T(-2), T(-2)); -+ -+ std::complex cra = std::tr1::acosh(ca); -+ std::complex crb = std::tr1::acosh(cb); -+ std::complex crc = std::tr1::acosh(cc); -+ -+ VERIFY( cra.real() > T(0) ); -+ VERIFY( crb.real() > T(0) ); -+ VERIFY( crc.real() > T(0) ); -+ } -+ -+// libstdc++/50880 -+void test01() -+{ -+ test01_do(); -+ test01_do(); -+ test01_do(); -+} -+ -+int main() -+{ -+ test01(); -+ return 0; -+} ---- a/src/libtool.m4 -+++ b/src/libtool.m4 -@@ -2273,7 +2273,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[[123]]*) objformat=aout ;; -+ freebsd[[23]].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -2291,7 +2291,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[[01]]* | freebsdelf3.[[01]]*) -@@ -4804,7 +4804,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - _LT_TAGVAR(hardcode_direct, $1)=yes - _LT_TAGVAR(hardcode_minus_L, $1)=yes -@@ -5751,7 +5751,7 @@ - esac - ;; - -- freebsd[[12]]*) -+ freebsd2.*) - # C++ shared libraries reported to be fairly broken before - # switch to ELF - _LT_TAGVAR(ld_shlibs, $1)=no ---- a/src/lto-plugin/ChangeLog -+++ b/src/lto-plugin/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/lto-plugin/configure -+++ b/src/lto-plugin/configure -@@ -8733,7 +8733,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9646,7 +9646,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9664,7 +9664,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) ---- a/src/maintainer-scripts/ChangeLog -+++ b/src/maintainer-scripts/ChangeLog -@@ -1,3 +1,8 @@ -+2011-11-23 Gerald Pfeifer -+ -+ * update_web_docs_svn: Make $DOCSDIR group writable after -+ creating it. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/maintainer-scripts/update_web_docs_svn -+++ b/src/maintainer-scripts/update_web_docs_svn -@@ -93,6 +93,7 @@ - - if [ ! -d $DOCSDIR ]; then - mkdir $DOCSDIR -+ chmod g+w $DOCSDIR - fi - - if [ -z "$RELEASE" ]; then ---- a/src/zlib/ChangeLog -+++ b/src/zlib/ChangeLog -@@ -1,3 +1,7 @@ -+2011-11-20 Andreas Tobler -+ -+ * configure: Regenerate. -+ - 2011-10-26 Release Manager - - * GCC 4.6.2 released. ---- a/src/zlib/configure -+++ b/src/zlib/configure -@@ -8600,7 +8600,7 @@ - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. -- freebsd2*) -+ freebsd2.*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes -@@ -9516,7 +9516,7 @@ - objformat=`/usr/bin/objformat` - else - case $host_os in -- freebsd[123]*) objformat=aout ;; -+ freebsd[23].*) objformat=aout ;; - *) objformat=elf ;; - esac - fi -@@ -9534,7 +9534,7 @@ - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in -- freebsd2*) -+ freebsd2.*) - shlibpath_overrides_runpath=yes - ;; - freebsd3.[01]* | freebsdelf3.[01]*) diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-multiarch-doc.diff gcc-4.6-4.6.4/debian/patches/gcc-multiarch-doc.diff --- gcc-4.6-4.6.2/debian/patches/gcc-multiarch-doc.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-multiarch-doc.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,8 +1,101 @@ -# DP: Document -print-multiarch option +# DP: Document multiarch changes ---- a/src/gcc/doc/invoke.texi -+++ b/src/gcc/doc/invoke.texi -@@ -5937,6 +5937,11 @@ +--- a/src/gcc/doc/install.texi (Revision 193696) ++++ b/src/gcc/doc/install.texi (Arbeitskopie) +@@ -1005,6 +1005,14 @@ + workable alternative. This requires gas and gdb, as the normal SVR4 + tools can not generate or interpret stabs. + ++@item --enable-multiarch ++Specify whether to enable or disable multiarch support. The default is ++to check for glibc start files in a multiarch location, and enable it ++if the files are found. The auto detection is enabled for native builds, ++and for cross builds configured with @option{--with-sysroot}. ++More documentation about multiarch can be found at ++@uref{http://wiki.debian.org/Multiarch}. ++ + @item --disable-multilib + Specify that multiple target + libraries to support different target variants, calling +--- a/src/gcc/doc/fragments.texi (Revision 193696) ++++ b/src/gcc/doc/fragments.texi (Arbeitskopie) +@@ -115,6 +115,12 @@ + default value will be @code{MULTILIB_OPTIONS}, with all slashes treated + as spaces. + ++@code{MULTILIB_DIRNAMES} describes the multilib directories using GCC ++conventions and is applied to directories that are part of the GCC ++installation. When multilib-enabled, the compiler will add a ++subdirectory of the form @var{prefix}/@var{multilib} before each ++directory in the search path for libraries and crt files. ++ + For example, if @code{MULTILIB_OPTIONS} is set to @samp{m68000/m68020 + msoft-float}, then the default value of @code{MULTILIB_DIRNAMES} is + @samp{m68000 m68020 msoft-float}. You may specify a different value if +@@ -157,6 +163,60 @@ + you must set this to the directory containing the headers. This value + should match the value of the @code{SYSTEM_INCLUDE_DIR} macro. + ++@findex MULTILIB_OSDIRNAMES ++@item MULTILIB_OSDIRNAMES ++If @code{MULTILIB_OPTIONS} is used, this variable specifies ++a list of subdirectory names, that are used to modify the search ++path depending on the chosen multilib. Unlike @code{MULTILIB_DIRNAMES}, ++@code{MULTILIB_OSDIRNAMES} describes the multilib directories using ++operating systems conventions, and is applied to the directories such as ++@code{lib} or those in the @env{LIBRARY_PATH} environment variable. ++The format is either the same as of ++@code{MULTILIB_DIRNAMES}, or a set of mappings. When it is the same ++as @code{MULTILIB_DIRNAMES}, it describes the multilib directories ++using operating system conventions, rather than GCC conventions. When it is a set ++of mappings of the form @var{gccdir}=@var{osdir}, the left side gives ++the GCC convention and the right gives the equivalent OS defined ++location. If the @var{osdir} part begins with a @samp{!}, ++GCC will not search in the non-multilib directory and use ++exclusively the multilib directory. Otherwise, the compiler will ++examine the search path for libraries and crt files twice; the first ++time it will add @var{multilib} to each directory in the search path, ++the second it will not. ++ ++For configurations that support both multilib and multiarch, ++@code{MULTILIB_OSDIRNAMES} also encodes the multiarch name, thus ++subsuming @code{MULTIARCH_DIRNAME}. The multiarch name is appended to ++each directory name, separated by a colon (e.g. ++@samp{../lib32:i386-linux-gnu}). ++ ++Each multiarch subdirectory will be searched before the corresponding OS ++multilib directory, for example @samp{/lib/i386-linux-gnu} before ++@samp{/lib/../lib32}. The multiarch name will also be used to modify the ++system header search path, as explained for @code{MULTIARCH_DIRNAME}. ++ ++@findex MULTIARCH_DIRNAME ++@item MULTIARCH_DIRNAME ++This variable specifies the multiarch name for configurations that are ++multiarch-enabled but not multilibbed configurations. ++ ++The multiarch name is used to augment the search path for libraries, crt ++files and system header files with additional locations. The compiler ++will add a multiarch subdirectory of the form ++@var{prefix}/@var{multiarch} before each directory in the library and ++crt search path. It will also add two directories ++@code{LOCAL_INCLUDE_DIR}/@var{multiarch} and ++@code{NATIVE_SYSTEM_HEADER_DIR}/@var{multiarch}) to the system header ++search path, respectively before @code{LOCAL_INCLUDE_DIR} and ++@code{NATIVE_SYSTEM_HEADER_DIR}. ++ ++@code{MULTIARCH_DIRNAME} is not used for configurations that support ++both multilib and multiarch. In that case, multiarch names are encoded ++in @code{MULTILIB_OSDIRNAMES} instead. ++ ++More documentation about multiarch can be found at ++@uref{http://wiki.debian.org/Multiarch}. ++ + @findex SPECS + @item SPECS + Unfortunately, setting @code{MULTILIB_EXTRA_OPTS} is not enough, since +--- a/src/gcc/doc/invoke.texi (Revision 193696) ++++ b/src/gcc/doc/invoke.texi (Arbeitskopie) +@@ -5787,6 +5787,11 @@ @file{../lib32}, or if OS libraries are present in @file{lib/@var{subdir}} subdirectories it prints e.g.@: @file{amd64}, @file{sparcv9} or @file{ev6}. diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-multiarch.diff gcc-4.6-4.6.4/debian/patches/gcc-multiarch.diff --- gcc-4.6-4.6.2/debian/patches/gcc-multiarch.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-multiarch.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,589 +1,5 @@ -# DP: Add multiarch support to GCC. -# DP: -# DP: Convert the multilib option to a target triplet, -# DP: add multiarch include directories and libraries path: -# DP: /usr/local/include/-linux-gnu -# DP: /usr/include/-linux-gnu -# DP: /usr/lib/-linux-gnu -# DP: to the system paths. +# DP: Add multiarch support to GCC (chunks not yet applied upstream). -2011-08-18 Matthias Klose - - * doc/invoke.texi: Document -print-multiarch. - * Makefile.in (s-mlib): Pass MULTIARCH_DIRNAME to genmultilib. - * genmultilib: Add new option for the multiarch name. - * gcc.c (multiarch_dir): Define. - (for_each_path): Search for multiarch suffixes. - (driver_handle_option): Handle multiarch option. - (do_spec_1): Pass -imultiarch if defined. - (main): Print multiarch. - (set_multilib_dir): Separate multilib and multiarch names - from multilib_select. - (print_multilib_info): Ignore multiarch names in multilib_select. - * incpath.c (add_standard_paths): Search the multiarch include dirs. - * cppdeault.h (default_include): Document multiarch in multilib - member. - * cppdefault.c: [LOCAL_INCLUDE_DIR, STANDARD_INCLUDE_DIR] Add an - include directory for multiarch directories. - * common.opt: New options --print-multiarch and -imultilib. - * config/s390/t-linux64: Add multiarch names in MULTILIB_OSDIRNAMES. - * config/sparc/t-linux64: Likewise. - * config/powerpc/t-linux64: Likewise. - * config/i386/t-linux64: Likewise. - * config/mips/t-linux64: Likewise. - * config/alpha/t-linux: Define MULTIARCH_DIRNAME. - * config/arm/t-linux: Likewise. - * config/i386/t-linux: Likewise. - * config/pa/t-linux: Likewise. - * config/sparc/t-linux: Likewise. - * config/ia64/t-glibc: Define MULTIARCH_DIRNAME for linux target. - - -Index: b/src/gcc/incpath.c -=================================================================== ---- a/src/gcc/incpath.c -+++ b/src/gcc/incpath.c -@@ -150,8 +150,14 @@ - if (!strncmp (p->fname, cpp_GCC_INCLUDE_DIR, len)) - { - char *str = concat (iprefix, p->fname + len, NULL); -- if (p->multilib && imultilib) -+ if (p->multilib == 1 && imultilib) - str = concat (str, dir_separator_str, imultilib, NULL); -+ else if (p->multilib == 2) -+ { -+ if (!imultiarch) -+ continue; -+ str = concat (str, dir_separator_str, imultiarch, NULL); -+ } - add_path (str, SYSTEM, p->cxx_aware, false); - } - } -@@ -195,8 +201,14 @@ - else - str = update_path (p->fname, p->component); - -- if (p->multilib && imultilib) -+ if (p->multilib == 1 && imultilib) - str = concat (str, dir_separator_str, imultilib, NULL); -+ else if (p->multilib == 2) -+ { -+ if (!imultiarch) -+ continue; -+ str = concat (str, dir_separator_str, imultiarch, NULL); -+ } - - add_path (str, SYSTEM, p->cxx_aware, false); - } -Index: b/src/gcc/gcc.c -=================================================================== ---- a/src/gcc/gcc.c -+++ b/src/gcc/gcc.c -@@ -1135,6 +1135,11 @@ - set_multilib_dir based on the compilation options. */ - - static const char *multilib_os_dir; -+ -+/* Subdirectory to use for locating libraries in multiarch conventions. Set by -+ set_multilib_dir based on the compilation options. */ -+ -+static const char *multiarch_dir; - - /* Structure to keep track of the specs that have been defined so far. - These are accessed using %(specname) or %[specname] in a compiler -@@ -2048,6 +2053,7 @@ - struct prefix_list *pl; - const char *multi_dir = NULL; - const char *multi_os_dir = NULL; -+ const char *multiarch_suffix = NULL; - const char *multi_suffix; - const char *just_multi_suffix; - char *path = NULL; -@@ -2065,11 +2071,14 @@ - } - if (do_multi && multilib_os_dir && strcmp (multilib_os_dir, ".") != 0) - multi_os_dir = concat (multilib_os_dir, dir_separator_str, NULL); -+ if (multiarch_dir) -+ multiarch_suffix = concat (multiarch_dir, dir_separator_str, NULL); - - while (1) - { - size_t multi_dir_len = 0; - size_t multi_os_dir_len = 0; -+ size_t multiarch_len = 0; - size_t suffix_len; - size_t just_suffix_len; - size_t len; -@@ -2102,16 +2111,15 @@ - multi_dir_len = strlen (multi_dir); - if (multi_os_dir) - multi_os_dir_len = strlen (multi_os_dir); -+ if (multiarch_suffix) -+ multiarch_len = strlen (multiarch_suffix); - suffix_len = strlen (multi_suffix); - just_suffix_len = strlen (just_multi_suffix); - - if (path == NULL) - { - len = paths->max_len + extra_space + 1; -- if (suffix_len > multi_os_dir_len) -- len += suffix_len; -- else -- len += multi_os_dir_len; -+ len += MAX (MAX (suffix_len, multi_os_dir_len), multiarch_len); - path = XNEWVEC (char, len); - } - -@@ -2116,6 +2127,16 @@ - break; - } - -+ /* Now try the multiarch path. */ -+ if (!skip_multi_dir -+ && !pl->require_machine_suffix && multiarch_dir) -+ { -+ memcpy (path + len, multiarch_suffix, multiarch_len + 1); -+ ret = callback (path, callback_info); -+ if (ret) -+ break; -+ } -+ - /* Now try the base path. */ - if (!pl->require_machine_suffix - && !(pl->os_multilib ? skip_multi_os_dir : skip_multi_dir)) -@@ -3217,6 +3238,7 @@ - case OPT_print_multi_directory: - case OPT_print_sysroot: - case OPT_print_multi_os_directory: -+ case OPT_print_multiarch: - case OPT_print_sysroot_headers_suffix: - case OPT_time: - case OPT_wrapper: -@@ -4867,6 +4889,15 @@ - do_spec_1 (" ", 0, NULL); - } - -+ if (multiarch_dir) -+ { -+ do_spec_1 ("-imultiarch", 1, NULL); -+ /* Make this a separate argument. */ -+ do_spec_1 (" ", 0, NULL); -+ do_spec_1 (multiarch_dir, 1, NULL); -+ do_spec_1 (" ", 0, NULL); -+ } -+ - if (gcc_exec_prefix) - { - do_spec_1 ("-iprefix", 1, NULL); -@@ -6812,6 +6843,15 @@ - return (0); - } - -+ if (print_multiarch) -+ { -+ if (multiarch_dir == NULL) -+ printf ("\n"); -+ else -+ printf ("%s\n", multiarch_dir); -+ return (0); -+ } -+ - if (print_sysroot) - { - if (target_system_root) -@@ -7787,10 +7827,26 @@ - q++; - if (q < end) - { -- char *new_multilib_os_dir = XNEWVEC (char, end - q); -+ const char *q2 = q + 1; -+ char *new_multilib_os_dir; -+ -+ while (q2 < end && *q2 != ':') -+ q2++; -+ if (*q2 == ':') -+ end = q2; -+ new_multilib_os_dir = XNEWVEC (char, end - q); - memcpy (new_multilib_os_dir, q + 1, end - q - 1); - new_multilib_os_dir[end - q - 1] = '\0'; - multilib_os_dir = new_multilib_os_dir; -+ -+ end = this_path + this_path_len; -+ if (q2 < end && *q2 == ':') -+ { -+ char *new_multiarch_dir = XNEWVEC (char, end - q2); -+ memcpy (new_multiarch_dir, q2 + 1, end - q2 - 1); -+ new_multiarch_dir[end - q2 - 1] = '\0'; -+ multiarch_dir = new_multiarch_dir; -+ } - break; - } - } -@@ -7852,7 +7908,7 @@ - /* When --disable-multilib was used but target defines - MULTILIB_OSDIRNAMES, entries starting with .: are there just - to find multilib_os_dir, so skip them from output. */ -- if (this_path[0] == '.' && this_path[1] == ':') -+ if (this_path[0] == '.' && this_path[1] == ':' && this_path[2] != '.' && this_path[3] != ':') - skip = 1; - - /* Check for matches with the multilib_exclusions. We don't bother -Index: b/src/gcc/genmultilib -=================================================================== ---- a/src/gcc/genmultilib -+++ b/src/gcc/genmultilib -@@ -73,6 +73,8 @@ - # the os directory names are used exclusively. Use the mapping when - # there is no one-to-one equivalence between GCC levels and the OS. - -+# The optional eight argument is the multiarch name. -+ - # The last option should be "yes" if multilibs are enabled. If it is not - # "yes", all GCC multilib dir names will be ".". - -@@ -121,7 +123,8 @@ - extra=$5 - exclusions=$6 - osdirnames=$7 --enable_multilib=$8 -+multiarch=$8 -+enable_multilib=$9 - - echo "static const char *const multilib_raw[] = {" - -@@ -222,6 +225,9 @@ - # names. - toosdirnames= - defaultosdirname= -+if [ -n "${multiarch}" ]; then -+ defaultosdirname=:.:${multiarch} -+fi - if [ -n "${osdirnames}" ]; then - set x ${osdirnames} - shift -@@ -229,6 +235,9 @@ - case "$1" in - .=*) - defaultosdirname=`echo $1 | sed 's|^.=|:|'` -+ if [ -n "${multiarch}" ]; then -+ defaultosdirname=${defaultosdirname}:${multiarch} -+ fi - shift - ;; - *=*) -@@ -314,13 +323,13 @@ - dirout=`echo ${combo} | sed -e 's/=/-/g'` - fi - # Remove the leading and trailing slashes. -- dirout=`echo ${dirout} | sed -e 's|^/||' -e 's|/$||g'` -+ dirout=`echo ${dirout} | sed -e 's|^/||' -e 's|/*:/*|:|' -e 's|/$||g'` - - # Use the OS directory names rather than the option names. - if [ -n "${toosdirnames}" ]; then - osdirout=`echo ${combo} | sed ${toosdirnames}` - # Remove the leading and trailing slashes. -- osdirout=`echo ${osdirout} | sed -e 's|^/||' -e 's|/$||g'` -+ osdirout=`echo ${osdirout} | sed -e 's|^/||' -e 's|/*:/*|:|' -e 's|/$||g'` - if [ "x${enable_multilib}" != xyes ]; then - dirout=".:${osdirout}" - disable_multilib=yes -Index: b/src/gcc/cppdefault.c -=================================================================== ---- a/src/gcc/cppdefault.c -+++ b/src/gcc/cppdefault.c -@@ -64,6 +64,7 @@ - #endif - #ifdef LOCAL_INCLUDE_DIR - /* /usr/local/include comes before the fixincluded header files. */ -+ { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 2 }, - { LOCAL_INCLUDE_DIR, 0, 0, 1, 1, 0 }, - #endif - #ifdef PREFIX_INCLUDE_DIR -@@ -95,6 +96,7 @@ - #endif - #ifdef STANDARD_INCLUDE_DIR - /* /usr/include comes dead last. */ -+ { STANDARD_INCLUDE_DIR, STANDARD_INCLUDE_COMPONENT, 0, 0, 1, 2 }, - { STANDARD_INCLUDE_DIR, STANDARD_INCLUDE_COMPONENT, 0, 0, 1, 0 }, - #endif - { 0, 0, 0, 0, 0, 0 } -Index: b/src/gcc/cppdefault.h -=================================================================== ---- a/src/gcc/cppdefault.h -+++ b/src/gcc/cppdefault.h -@@ -43,9 +43,11 @@ - C++. */ - const char add_sysroot; /* FNAME should be prefixed by - cpp_SYSROOT. */ -- const char multilib; /* FNAME should have the multilib path -- specified with -imultilib -- appended. */ -+ const char multilib; /* FNAME should have appended -+ - the multilib path specified with -imultilib -+ when 1 is passed, -+ - the multiarch path specified with -+ -imultiarch, when 2 is passed. */ - }; - - extern const struct default_include cpp_include_defaults[]; -Index: b/src/gcc/common.opt -=================================================================== ---- a/src/gcc/common.opt -+++ b/src/gcc/common.opt -@@ -334,6 +334,9 @@ - -print-multi-os-directory - Driver Alias(print-multi-os-directory) - -+-print-multiarch -+Driver Alias(print-multiarch) -+ - -print-prog-name - Driver Separate Alias(print-prog-name=) - -@@ -2190,6 +2193,10 @@ - Common Joined Var(plugindir_string) Init(0) - -iplugindir= Set to be the default plugin directory - -+imultiarch -+Common Joined Separate RejectDriver Var(imultiarch) Init(0) -+-imultiarch Set to be the multiarch include subdirectory -+ - l - Driver Joined Separate - -@@ -2247,6 +2254,9 @@ - - print-multi-os-directory - Driver Var(print_multi_os_directory) -+ -+print-multiarch -+Driver Var(print_multiarch) - - print-prog-name= - Driver JoinedOrMissing Var(print_prog_name) -Index: b/src/gcc/Makefile.in -=================================================================== ---- a/src/gcc/Makefile.in -+++ b/src/gcc/Makefile.in -@@ -338,6 +338,8 @@ - - enable_plugin = @enable_plugin@ - -+with_float = @with_float@ -+ - CPPLIB = ../libcpp/libcpp.a - CPPINC = -I$(srcdir)/../libcpp/include - -@@ -1943,10 +1945,11 @@ - "$(MULTILIB_EXTRA_OPTS)" \ - "$(MULTILIB_EXCLUSIONS)" \ - "$(MULTILIB_OSDIRNAMES)" \ -+ "$(MULTIARCH_DIRNAME)" \ - "@enable_multilib@" \ - > tmp-mlib.h; \ - else \ -- $(SHELL) $(srcdir)/genmultilib '' '' '' '' '' '' '' no \ -+ $(SHELL) $(srcdir)/genmultilib '' '' '' '' '' '' '' "$(MULTIARCH_DIRNAME)" no \ - > tmp-mlib.h; \ - fi - $(SHELL) $(srcdir)/../move-if-change tmp-mlib.h multilib.h -Index: b/src/gcc/config/alpha/t-linux -=================================================================== ---- a/src/gcc/config/alpha/t-linux -+++ b/src/gcc/config/alpha/t-linux -@@ -1 +1,3 @@ - SHLIB_MAPFILES += $(srcdir)/config/alpha/libgcc-alpha-ldbl.ver -+ -+MULTIARCH_DIRNAME = alpha-linux-gnu -Index: b/src/gcc/config/s390/t-linux64 -=================================================================== ---- a/src/gcc/config/s390/t-linux64 -+++ b/src/gcc/config/s390/t-linux64 -@@ -7,4 +7,4 @@ - - MULTILIB_OPTIONS = m64/m31 - MULTILIB_DIRNAMES = 64 32 --MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib) -+MULTILIB_OSDIRNAMES = ../lib64:s390x-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):s390-linux-gnu -Index: b/src/gcc/config/sparc/t-linux64 -=================================================================== ---- a/src/gcc/config/sparc/t-linux64 -+++ b/src/gcc/config/sparc/t-linux64 -@@ -26,7 +26,7 @@ - - MULTILIB_OPTIONS = m64/m32 - MULTILIB_DIRNAMES = 64 32 --MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib) -+MULTILIB_OSDIRNAMES = ../lib64:sparc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):sparc-linux-gnu - - LIBGCC = stmp-multilib - INSTALL_LIBGCC = install-multilib -Index: b/src/gcc/config/sparc/t-linux -=================================================================== ---- a/src/gcc/config/sparc/t-linux -+++ b/src/gcc/config/sparc/t-linux -@@ -3,3 +3,5 @@ - # Avoid the t-linux version file. - SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \ - $(srcdir)/config/sparc/libgcc-sparc-glibc.ver -+ -+MULTIARCH_DIRNAME = sparc-linux-gnu -Index: b/src/gcc/config/i386/t-linux -=================================================================== ---- a/src/gcc/config/i386/t-linux -+++ b/src/gcc/config/i386/t-linux -@@ -3,3 +3,5 @@ - # t-slibgcc-elf-ver and t-linux - SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \ - $(srcdir)/config/i386/libgcc-glibc.ver -+ -+MULTIARCH_DIRNAME = i386-linux-gnu -Index: b/src/gcc/config/i386/t-linux64 -=================================================================== ---- a/src/gcc/config/i386/t-linux64 -+++ b/src/gcc/config/i386/t-linux64 -@@ -25,7 +25,11 @@ - - MULTILIB_OPTIONS = m64/m32 - MULTILIB_DIRNAMES = 64 32 --MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib) -+MULTILIB_OSDIRNAMES = ../lib64:x86_64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):i386-linux-gnu -+ -+ifneq (,$(findstring kfreebsd, $(target))) -+ MULTILIB_OSDIRNAMES := $(subst linux,kfreebsd,$(MULTILIB_OSDIRNAMES)) -+endif - - LIBGCC = stmp-multilib - INSTALL_LIBGCC = install-multilib -Index: b/src/gcc/config/ia64/t-glibc -=================================================================== ---- a/src/gcc/config/ia64/t-glibc -+++ b/src/gcc/config/ia64/t-glibc -@@ -3,3 +3,7 @@ - $(srcdir)/unwind-compat.c - - SHLIB_MAPFILES += $(srcdir)/config/ia64/libgcc-glibc.ver -+ -+ifneq (,$(findstring linux, $(target))) -+MULTIARCH_DIRNAME = ia64-linux-gnu -+endif -Index: b/src/gcc/config/m68k/t-linux -=================================================================== ---- a/src/gcc/config/m68k/t-linux -+++ b/src/gcc/config/m68k/t-linux -@@ -21,6 +21,9 @@ - # Only include multilibs for 680x0 CPUs with an MMU. - M68K_MLIB_CPU += && (CPU ~ "^m680") && (FLAGS ~ "FL_MMU") - -+MULTILIB_OSDIRNAMES = m68k-linux-gnu:m68k-linux-gnu -+MULTIARCH_DIRNAME = m68k-linux-gnu -+ - # This rule uses MULTILIB_MATCHES to generate a definition of - # SYSROOT_SUFFIX_SPEC. - sysroot-suffix.h: $(srcdir)/config/m68k/print-sysroot-suffix.sh -Index: b/src/gcc/config/rs6000/t-linux64 -=================================================================== ---- a/src/gcc/config/rs6000/t-linux64 -+++ b/src/gcc/config/rs6000/t-linux64 -@@ -36,7 +36,7 @@ - MULTILIB_EXTRA_OPTS = fPIC mstrict-align - MULTILIB_EXCEPTIONS = m64/msoft-float - MULTILIB_EXCLUSIONS = m64/!m32/msoft-float --MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib) nof -+MULTILIB_OSDIRNAMES = ../lib64:powerpc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):powerpc-linux-gnu nof - MULTILIB_MATCHES = $(MULTILIB_MATCHES_FLOAT) - - softfp_wrap_start := '\#ifndef __powerpc64__' -Index: b/src/gcc/config/pa/t-linux -=================================================================== ---- a/src/gcc/config/pa/t-linux -+++ b/src/gcc/config/pa/t-linux -@@ -35,3 +35,5 @@ - - # Compile crtbeginS.o and crtendS.o as PIC. - CRTSTUFF_T_CFLAGS_S = -fPIC -+ -+MULTIARCH_DIRNAME = hppa-linux-gnu -Index: b/src/gcc/config/mips/t-linux64 -=================================================================== ---- a/src/gcc/config/mips/t-linux64 -+++ b/src/gcc/config/mips/t-linux64 -@@ -18,7 +18,11 @@ - - MULTILIB_OPTIONS = mabi=n32/mabi=32/mabi=64 - MULTILIB_DIRNAMES = n32 32 64 --MULTILIB_OSDIRNAMES = ../lib32 ../lib ../lib64 -+MIPS_EL = $(if $(filter %el, $(firstword $(subst -, ,$(target)))),el) -+MULTILIB_OSDIRNAMES = \ -+ ../lib32:mips64$(MIPS_EL)-linux-gnuabin32 \ -+ ../lib:mips$(MIPS_EL)-linux-gnu \ -+ ../lib64:mips64$(MIPS_EL)-linux-gnuabi64 - - EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o - -Index: b/src/gcc/config.gcc -=================================================================== ---- a/src/gcc/config.gcc -+++ b/src/gcc/config.gcc -@@ -2180,6 +2180,7 @@ - ;; - *) - tm_file="${tm_file} rs6000/linux.h glibc-stdint.h" -+ tmake_file="$tmake_file rs6000/t-linux" - ;; - esac - tmake_file="${tmake_file} t-slibgcc-libgcc rs6000/t-fprules-softfp soft-fp/t-softfp" -@@ -2311,6 +2312,8 @@ - tm_file="s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h" - if test x$enable_targets = xall; then - tmake_file="${tmake_file} s390/t-linux64" -+ else -+ tmake_file="${tmake_file} s390/t-linux" - fi - ;; - s390x-*-linux*) -@@ -3674,6 +3677,14 @@ - i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu | \ - i[34567]86-*-gnu*) - tmake_file="${tmake_file} i386/t-fprules-softfp soft-fp/t-softfp i386/t-linux" -+ case ${target} in -+ i[34567]86-*-linux* | x86_64-*-linux*) -+ ;; -+ i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu) -+ tmake_file="${tmake_file} i386/t-kfreebsd";; -+ i[34567]86-*-gnu*) -+ tmake_file="${tmake_file} i386/t-gnu";; -+ esac - ;; - i[34567]86-*-solaris2*) - tmake_file="${tmake_file} i386/t-fprules-softfp soft-fp/t-softfp" -Index: b/src/gcc/config/i386/t-gnu -=================================================================== ---- /dev/null -+++ b/src/gcc/config/i386/t-gnu -@@ -0,0 +1 @@ -+MULTIARCH_DIRNAME = i386-gnu -Index: b/src/gcc/config/i386/t-kfreebsd -=================================================================== ---- /dev/null -+++ b/src/gcc/config/i386/t-kfreebsd -@@ -0,0 +1 @@ -+MULTIARCH_DIRNAME = i386-kfreebsd-gnu -Index: b/src/gcc/config/arm/t-linux-eabi -=================================================================== ---- a/src/gcc/config/arm/t-linux-eabi -+++ b/src/gcc/config/arm/t-linux-eabi -@@ -42,6 +42,8 @@ - #MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te - #MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* - -+MULTIARCH_DIRNAME = arm-linux-gnueabi$(if $(filter hard,$(with_float)),hf) -+ - # Use a version of div0 which raises SIGFPE, and a special __clear_cache. - LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache - -Index: b/src/gcc/config/sh/t-linux -=================================================================== --- a/src/gcc/config/sh/t-linux +++ b/src/gcc/config/sh/t-linux @@ -6,3 +6,5 @@ @@ -592,45 +8,21 @@ EXTRA_MULTILIB_PARTS= crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o + +MULTILIB_OSDIRNAMES = sh4-linux-gnu:sh4-linux-gnu sh4_nofpu-linux-gnu:sh4-linux-gnu -Index: b/src/gcc/configure.ac -=================================================================== ---- a/src/gcc/configure.ac -+++ b/src/gcc/configure.ac -@@ -606,6 +606,9 @@ - [], [enable_multilib=yes]) - AC_SUBST(enable_multilib) - -+# needed for ARM multiarch name -+AC_SUBST(with_float) -+ - # Enable __cxa_atexit for C++. - AC_ARG_ENABLE(__cxa_atexit, - [ --enable-__cxa_atexit enable __cxa_atexit for C++], -Index: b/src/gcc/config/rs6000/t-linux -=================================================================== ---- /dev/null -+++ b/src/gcc/config/rs6000/t-linux -@@ -0,0 +1 @@ -+MULTIARCH_DIRNAME = powerpc-linux-gnu -Index: b/src/gcc/config/s390/t-linux -=================================================================== --- /dev/null +++ b/src/gcc/config/s390/t-linux @@ -0,0 +1 @@ +MULTIARCH_DIRNAME = s390-linux-gnu ---- a/src/gcc/config.gcc -+++ b/src/gcc/config.gcc -@@ -2188,7 +2188,8 @@ - powerpc*-*-linux*altivec*) - tm_file="${tm_file} rs6000/linuxaltivec.h" ;; - powerpc*-*-linux*spe*) -- tm_file="${tm_file} rs6000/linuxspe.h rs6000/e500.h" ;; -+ tm_file="${tm_file} rs6000/linuxspe.h rs6000/e500.h" -+ tmake_file="${tmake_file} rs6000/t-linux-spe" ;; - powerpc*-*-linux*paired*) - tm_file="${tm_file} rs6000/750cl.h" ;; - esac ---- a/src/gcc/config/rs6000/t-linux-spe -+++ b/src/gcc/config/rs6000/t-linux-spe -@@ -0,0 +1 @@ -+MULTIARCH_DIRNAME = powerpc-linux-gnuspe +--- a/src/libstdc++-v3/python/hook.in ++++ b/src/libstdc++-v3/python/hook.in +@@ -47,7 +47,10 @@ + libdir = libdir[len (prefix):] + + # Compute the ".."s needed to get from libdir to the prefix. +- dotdots = ('..' + os.sep) * len (libdir.split (os.sep)) ++ backdirs = len (libdir.split (os.sep)) ++ if not os.path.basename(os.path.dirname(__file__)).startswith('lib'): ++ backdirs += 1 # multiarch subdir ++ dotdots = ('..' + os.sep) * backdirs + + objfile = gdb.current_objfile ().filename + dir_ = os.path.join (os.path.dirname (objfile), dotdots, pythondir) diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-multilib64-multiarch.diff gcc-4.6-4.6.4/debian/patches/gcc-multilib64-multiarch.diff --- gcc-4.6-4.6.2/debian/patches/gcc-multilib64-multiarch.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-multilib64-multiarch.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,52 +1,53 @@ # DP: Use lib instead of lib64 as the 64bit system dir on biarch # DP: architectures defaulting to 64bit. -Index: gcc-4.6-4.6.1/src/gcc/config/s390/t-linux64 +Index: b/src/gcc/config/s390/t-linux64 =================================================================== ---- gcc-4.6-4.6.1.orig/src/gcc/config/s390/t-linux64 2011-08-20 16:05:05.000000000 +0000 -+++ gcc-4.6-4.6.1/src/gcc/config/s390/t-linux64 2011-08-20 16:08:42.440221785 +0000 -@@ -7,4 +7,4 @@ +--- a/src/gcc/config/s390/t-linux64 ++++ b/src/gcc/config/s390/t-linux64 +@@ -7,5 +7,5 @@ MULTILIB_OPTIONS = m64/m31 MULTILIB_DIRNAMES = 64 32 --MULTILIB_OSDIRNAMES = ../lib64:s390x-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):s390-linux-gnu -+MULTILIB_OSDIRNAMES = ../lib:s390x-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):s390-linux-gnu -Index: gcc-4.6-4.6.1/src/gcc/config/rs6000/t-linux64 +-MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:s390x-linux-gnu) ++MULTILIB_OSDIRNAMES = ../lib$(call if_multiarch,:s390x-linux-gnu) + MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:s390-linux-gnu) +Index: b/src/gcc/config/rs6000/t-linux64 =================================================================== ---- gcc-4.6-4.6.1.orig/src/gcc/config/rs6000/t-linux64 2011-08-20 16:08:08.000000000 +0000 -+++ gcc-4.6-4.6.1/src/gcc/config/rs6000/t-linux64 2011-08-20 16:09:43.770311733 +0000 +--- a/src/gcc/config/rs6000/t-linux64 ++++ b/src/gcc/config/rs6000/t-linux64 @@ -34,7 +34,7 @@ MULTILIB_OPTIONS = m64/m32 MULTILIB_DIRNAMES = 64 32 MULTILIB_EXTRA_OPTS = fPIC mstrict-align --MULTILIB_OSDIRNAMES = ../lib64:powerpc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):powerpc-linux-gnu -+MULTILIB_OSDIRNAMES = ../lib:powerpc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):powerpc-linux-gnu +-MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:powerpc64-linux-gnu) ++MULTILIB_OSDIRNAMES = ../lib$(call if_multiarch,:powerpc64-linux-gnu) + MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu) + MULTILIB_MATCHES = $(MULTILIB_MATCHES_FLOAT) - softfp_wrap_start := '\#ifndef __powerpc64__' - softfp_wrap_end := '\#endif' -Index: gcc-4.6-4.6.1/src/gcc/config/sparc/t-linux64 +Index: b/src/gcc/config/sparc/t-linux64 =================================================================== ---- gcc-4.6-4.6.1.orig/src/gcc/config/sparc/t-linux64 2011-08-20 16:05:05.000000000 +0000 -+++ gcc-4.6-4.6.1/src/gcc/config/sparc/t-linux64 2011-08-20 16:08:42.440221785 +0000 +--- a/src/gcc/config/sparc/t-linux64 ++++ b/src/gcc/config/sparc/t-linux64 @@ -26,7 +26,7 @@ MULTILIB_OPTIONS = m64/m32 MULTILIB_DIRNAMES = 64 32 --MULTILIB_OSDIRNAMES = ../lib64:sparc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):sparc-linux-gnu -+MULTILIB_OSDIRNAMES = ../lib:sparc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):sparc-linux-gnu +-MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:sparc64-linux-gnu) ++MULTILIB_OSDIRNAMES = ../lib$(call if_multiarch,:sparc64-linux-gnu) + MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:sparc-linux-gnu) LIBGCC = stmp-multilib - INSTALL_LIBGCC = install-multilib -Index: gcc-4.6-4.6.1/src/gcc/config/i386/t-linux64 +Index: b/src/gcc/config/i386/t-linux64 =================================================================== ---- gcc-4.6-4.6.1.orig/src/gcc/config/i386/t-linux64 2011-08-20 16:06:59.000000000 +0000 -+++ gcc-4.6-4.6.1/src/gcc/config/i386/t-linux64 2011-08-20 16:08:42.440221785 +0000 +--- a/src/gcc/config/i386/t-linux64 ++++ b/src/gcc/config/i386/t-linux64 @@ -25,7 +25,7 @@ MULTILIB_OPTIONS = m64/m32 MULTILIB_DIRNAMES = 64 32 --MULTILIB_OSDIRNAMES = ../lib64:x86_64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):i386-linux-gnu -+MULTILIB_OSDIRNAMES = ../lib:x86_64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):i386-linux-gnu +-MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:x86_64-linux-gnu) \ ++MULTILIB_OSDIRNAMES = ../lib$(call if_multiarch,:x86_64-linux-gnu) \ + $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:i386-linux-gnu) - ifneq (,$(findstring kfreebsd, $(target))) - MULTILIB_OSDIRNAMES := $(subst linux,kfreebsd,$(MULTILIB_OSDIRNAMES)) + LIBGCC = stmp-multilib diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-powerpc-nof.diff gcc-4.6-4.6.4/debian/patches/gcc-powerpc-nof.diff --- gcc-4.6-4.6.2/debian/patches/gcc-powerpc-nof.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-powerpc-nof.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,10 +1,10 @@ # DP: Don't build nof multlib on powerpc. -Index: gcc-4.6-4.6.1/src/gcc/config/rs6000/t-linux64 +Index: b/src/gcc/config/rs6000/t-linux64 =================================================================== ---- gcc-4.6-4.6.1.orig/src/gcc/config/rs6000/t-linux64 2011-08-20 16:05:05.000000000 +0000 -+++ gcc-4.6-4.6.1/src/gcc/config/rs6000/t-linux64 2011-08-20 16:08:08.900172593 +0000 -@@ -31,13 +31,10 @@ +--- a/src/gcc/config/rs6000/t-linux64 ++++ b/src/gcc/config/rs6000/t-linux64 +@@ -31,14 +31,11 @@ # it doesn't tell anything about the 32bit libraries on those systems. Set # MULTILIB_OSDIRNAMES according to what is found on the target. @@ -15,9 +15,9 @@ MULTILIB_EXTRA_OPTS = fPIC mstrict-align -MULTILIB_EXCEPTIONS = m64/msoft-float -MULTILIB_EXCLUSIONS = m64/!m32/msoft-float --MULTILIB_OSDIRNAMES = ../lib64:powerpc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):powerpc-linux-gnu nof --MULTILIB_MATCHES = $(MULTILIB_MATCHES_FLOAT) -+MULTILIB_OSDIRNAMES = ../lib64:powerpc64-linux-gnu $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib):powerpc-linux-gnu + MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:powerpc64-linux-gnu) + MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu) +-MULTILIB_OSDIRNAMES += nof + MULTILIB_MATCHES = $(MULTILIB_MATCHES_FLOAT) softfp_wrap_start := '\#ifndef __powerpc64__' - softfp_wrap_end := '\#endif' diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-speed-up-insn-attrtab.diff gcc-4.6-4.6.4/debian/patches/gcc-speed-up-insn-attrtab.diff --- gcc-4.6-4.6.2/debian/patches/gcc-speed-up-insn-attrtab.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-speed-up-insn-attrtab.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,433 @@ +# DP: speed up genattrtab (3/3) + +[backport from gcc-4.8/trunk r187714 ] + +Date: Mon, 7 May 2012 15:11:16 +0200 (CEST) +From: Michael Matz +Subject: Speed up insn-attrtab.c compilation +List-Archive: + +Hi, + +neverending story. Maybe this time something gets in :) + +This patch changes generation of insn-attrtab.c to: +* order attributes topologically (so that the "inlining" genattrtab does + is as effective as possible, and doesn't hit the size limits too much) +* adjusts the rtx_cost for the attribute tests to correctly account for + all tests (even the cheap ones have an impact after all) +* lowers the limits for inlining +* only uses an "optimized" new value if it actually is cheaper than + the old value (and overall not too expensive) +* limits the number of predicates we remember from if/elseif/elseif + cascades to optimize further elseif tests (doesn't change code on x86, + but reduces generation time) + +The effect is that insn-attrtab.c on x86_64 is 2.9 MB instead of 6.0 MB, +and that compilation of insn-attrtab.c with an unoptimized (i.e. stage1) +cc1 is done in 45 seconds instead of 269 seconds. The genattrtab call +itself (unoptimized genattrtab) takes 9.5 seconds instead of 9.1. + +This was before Steven implemented the three file split, with that split +take the above as cumulative time, the overall speedup transfers to the +split version. + +Compilation time on x86_64-linux, with an unoptimized cc1(plus), with -g +-O2: + unpatched patched +big-code.c 157.1s 157.4s +kdecore.cc 304.3s 301.9s + +(i.e. noise). + +In particular this patch doesn't contain my controversial variant of +caching get_attr_xxx calls (which dynamically calls those functions more +often than Jakubs variant). Still a good speedup, though. + +Regstrapping on x86_64-linux in progress. Okay if that passes? (I'd like +to retain the #if 0 code therein, it's just a generator and it helps +easily debugging the topsort thingy if something breaks) + + +Ciao, +Michael. + +gcc/ + +2012-05-21 Michael Matz + + * genattrtab.c (attr_rtx_cost): Move earlier, start with cost being 1. + (simplify_test_exp): Handle one more case of distributive law, + decrease cost threshold. + (tests_attr_p, get_attr_order): New functions. + (optimize_attrs): Use topological order, inline only cheap values. + (write_attr_set): Reset our_known_true after some time. + +--- a/src/gcc/genattrtab.c ++++ b/src/gcc/genattrtab.c +@@ -115,6 +115,8 @@ along with GCC; see the file COPYING3. + #include "gensupport.h" + #include "vecprim.h" + ++#define DEBUG 0 ++ + /* Flags for make_internal_attr's `special' parameter. */ + #define ATTR_NONE 0 + #define ATTR_SPECIAL (1 << 0) +@@ -1640,6 +1642,57 @@ write_length_unit_log (FILE *outf) + fprintf (outf, "EXPORTED_CONST int length_unit_log = %u;\n", length_unit_log); + } + ++/* Compute approximate cost of the expression. Used to decide whether ++ expression is cheap enough for inline. */ ++static int ++attr_rtx_cost (rtx x) ++{ ++ int cost = 1; ++ enum rtx_code code; ++ if (!x) ++ return 0; ++ code = GET_CODE (x); ++ switch (code) ++ { ++ case MATCH_OPERAND: ++ if (XSTR (x, 1)[0]) ++ return 10; ++ else ++ return 1; ++ ++ case EQ_ATTR_ALT: ++ return 1; ++ ++ case EQ_ATTR: ++ /* Alternatives don't result into function call. */ ++ if (!strcmp_check (XSTR (x, 0), alternative_name)) ++ return 1; ++ else ++ return 5; ++ default: ++ { ++ int i, j; ++ const char *fmt = GET_RTX_FORMAT (code); ++ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) ++ { ++ switch (fmt[i]) ++ { ++ case 'V': ++ case 'E': ++ for (j = 0; j < XVECLEN (x, i); j++) ++ cost += attr_rtx_cost (XVECEXP (x, i, j)); ++ break; ++ case 'e': ++ cost += attr_rtx_cost (XEXP (x, i)); ++ break; ++ } ++ } ++ } ++ break; ++ } ++ return cost; ++} ++ + /* Take a COND expression and see if any of the conditions in it can be + simplified. If any are known true or known false for the particular insn + code, the COND can be further simplified. +@@ -2235,57 +2288,6 @@ simplify_or_tree (rtx exp, rtx *pterm, i + return exp; + } + +-/* Compute approximate cost of the expression. Used to decide whether +- expression is cheap enough for inline. */ +-static int +-attr_rtx_cost (rtx x) +-{ +- int cost = 0; +- enum rtx_code code; +- if (!x) +- return 0; +- code = GET_CODE (x); +- switch (code) +- { +- case MATCH_OPERAND: +- if (XSTR (x, 1)[0]) +- return 10; +- else +- return 0; +- +- case EQ_ATTR_ALT: +- return 0; +- +- case EQ_ATTR: +- /* Alternatives don't result into function call. */ +- if (!strcmp_check (XSTR (x, 0), alternative_name)) +- return 0; +- else +- return 5; +- default: +- { +- int i, j; +- const char *fmt = GET_RTX_FORMAT (code); +- for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) +- { +- switch (fmt[i]) +- { +- case 'V': +- case 'E': +- for (j = 0; j < XVECLEN (x, i); j++) +- cost += attr_rtx_cost (XVECEXP (x, i, j)); +- break; +- case 'e': +- cost += attr_rtx_cost (XEXP (x, i)); +- break; +- } +- } +- } +- break; +- } +- return cost; +-} +- + /* Simplify test expression and use temporary obstack in order to avoid + memory bloat. Use ATTR_IND_SIMPLIFIED to avoid unnecessary simplifications + and avoid unnecessary copying if possible. */ +@@ -2618,6 +2620,25 @@ simplify_test_exp (rtx exp, int insn_cod + return SIMPLIFY_TEST_EXP (newexp, insn_code, insn_index); + } + ++ /* Similarly, ++ convert (ior (and (y) (x)) ++ (and (z) (x))) ++ to (and (ior (y) (z)) ++ (x)) ++ Note that we want the common term to stay at the end. ++ */ ++ ++ else if (GET_CODE (left) == AND && GET_CODE (right) == AND ++ && attr_equal_p (XEXP (left, 1), XEXP (right, 1))) ++ { ++ newexp = attr_rtx (IOR, XEXP (left, 0), XEXP (right, 0)); ++ ++ left = newexp; ++ right = XEXP (right, 1); ++ newexp = attr_rtx (AND, left, right); ++ return SIMPLIFY_TEST_EXP (newexp, insn_code, insn_index); ++ } ++ + /* See if all or all but one of the insn's alternatives are specified + in this tree. Optimize if so. */ + +@@ -2753,7 +2774,7 @@ simplify_test_exp (rtx exp, int insn_cod + x = evaluate_eq_attr (exp, attr, av->value, + insn_code, insn_index); + x = SIMPLIFY_TEST_EXP (x, insn_code, insn_index); +- if (attr_rtx_cost(x) < 20) ++ if (attr_rtx_cost(x) < 7) + return x; + } + } +@@ -2773,6 +2794,133 @@ simplify_test_exp (rtx exp, int insn_cod + return newexp; + } + ++/* Return 1 if any EQ_ATTR subexpression of P refers to ATTR, ++ otherwise return 0. */ ++ ++static int ++tests_attr_p (rtx p, struct attr_desc *attr) ++{ ++ const char *fmt; ++ int i, ie, j, je; ++ ++ if (GET_CODE (p) == EQ_ATTR) ++ { ++ if (XSTR (p, 0) != attr->name) ++ return 0; ++ return 1; ++ } ++ ++ fmt = GET_RTX_FORMAT (GET_CODE (p)); ++ ie = GET_RTX_LENGTH (GET_CODE (p)); ++ for (i = 0; i < ie; i++) ++ { ++ switch (*fmt++) ++ { ++ case 'e': ++ if (tests_attr_p (XEXP (p, i), attr)) ++ return 1; ++ break; ++ ++ case 'E': ++ je = XVECLEN (p, i); ++ for (j = 0; j < je; ++j) ++ if (tests_attr_p (XVECEXP (p, i, j), attr)) ++ return 1; ++ break; ++ } ++ } ++ ++ return 0; ++} ++ ++/* Calculate a topological sorting of all attributes so that ++ all attributes only depend on attributes in front of it. ++ Place the result in *RET (which is a pointer to an array of ++ attr_desc pointers), and return the size of that array. */ ++ ++static int ++get_attr_order (struct attr_desc ***ret) ++{ ++ int i, j; ++ int num = 0; ++ struct attr_desc *attr; ++ struct attr_desc **all, **sorted; ++ char *handled; ++ for (i = 0; i < MAX_ATTRS_INDEX; i++) ++ for (attr = attrs[i]; attr; attr = attr->next) ++ num++; ++ all = XNEWVEC (struct attr_desc *, num); ++ sorted = XNEWVEC (struct attr_desc *, num); ++ handled = XCNEWVEC (char, num); ++ num = 0; ++ for (i = 0; i < MAX_ATTRS_INDEX; i++) ++ for (attr = attrs[i]; attr; attr = attr->next) ++ all[num++] = attr; ++ ++ j = 0; ++ for (i = 0; i < num; i++) ++ if (all[i]->is_const) ++ handled[i] = 1, sorted[j++] = all[i]; ++ ++ /* We have only few attributes hence we can live with the inner ++ loop being O(n^2), unlike the normal fast variants of topological ++ sorting. */ ++ while (j < num) ++ { ++ for (i = 0; i < num; i++) ++ if (!handled[i]) ++ { ++ /* Let's see if I depends on anything interesting. */ ++ int k; ++ for (k = 0; k < num; k++) ++ if (!handled[k]) ++ { ++ struct attr_value *av; ++ for (av = all[i]->first_value; av; av = av->next) ++ if (av->num_insns != 0) ++ if (tests_attr_p (av->value, all[k])) ++ break; ++ ++ if (av) ++ /* Something in I depends on K. */ ++ break; ++ } ++ if (k == num) ++ { ++ /* Nothing in I depended on anything intersting, so ++ it's done. */ ++ handled[i] = 1; ++ sorted[j++] = all[i]; ++ } ++ } ++ } ++ ++ if (DEBUG) ++ for (j = 0; j < num; j++) ++ { ++ struct attr_desc *attr2; ++ struct attr_value *av; ++ ++ attr = sorted[j]; ++ fprintf (stderr, "%s depends on: ", attr->name); ++ for (i = 0; i < MAX_ATTRS_INDEX; ++i) ++ for (attr2 = attrs[i]; attr2; attr2 = attr2->next) ++ if (!attr2->is_const) ++ for (av = attr->first_value; av; av = av->next) ++ if (av->num_insns != 0) ++ if (tests_attr_p (av->value, attr2)) ++ { ++ fprintf (stderr, "%s, ", attr2->name); ++ break; ++ } ++ fprintf (stderr, "\n"); ++ } ++ ++ free (all); ++ *ret = sorted; ++ return num; ++} ++ + /* Optimize the attribute lists by seeing if we can determine conditional + values from the known values of other attributes. This will save subroutine + calls during the compilation. */ +@@ -2787,6 +2935,8 @@ optimize_attrs (void) + int i; + struct attr_value_list *ivbuf; + struct attr_value_list *iv; ++ struct attr_desc **topsort; ++ int topnum; + + /* For each insn code, make a list of all the insn_ent's for it, + for all values for all attributes. */ +@@ -2802,18 +2952,22 @@ optimize_attrs (void) + + iv = ivbuf = XNEWVEC (struct attr_value_list, num_insn_ents); + +- for (i = 0; i < MAX_ATTRS_INDEX; i++) +- for (attr = attrs[i]; attr; attr = attr->next) +- for (av = attr->first_value; av; av = av->next) +- for (ie = av->first_insn; ie; ie = ie->next) +- { +- iv->attr = attr; +- iv->av = av; +- iv->ie = ie; +- iv->next = insn_code_values[ie->def->insn_code]; +- insn_code_values[ie->def->insn_code] = iv; +- iv++; +- } ++ /* Create the chain of insn*attr values such that we see dependend ++ attributes after their dependencies. As we use a stack via the ++ next pointers start from the end of the topological order. */ ++ topnum = get_attr_order (&topsort); ++ for (i = topnum - 1; i >= 0; i--) ++ for (av = topsort[i]->first_value; av; av = av->next) ++ for (ie = av->first_insn; ie; ie = ie->next) ++ { ++ iv->attr = topsort[i]; ++ iv->av = av; ++ iv->ie = ie; ++ iv->next = insn_code_values[ie->def->insn_code]; ++ insn_code_values[ie->def->insn_code] = iv; ++ iv++; ++ } ++ free (topsort); + + /* Sanity check on num_insn_ents. */ + gcc_assert (iv == ivbuf + num_insn_ents); +@@ -2848,7 +3002,15 @@ optimize_attrs (void) + } + + rtl_obstack = old; +- if (newexp != av->value) ++ /* If we created a new value for this instruction, and it's ++ cheaper than the old value, and overall cheap, use that ++ one as specific value for the current instruction. ++ The last test is to avoid exploding the get_attr_ function ++ sizes for no much gain. */ ++ if (newexp != av->value ++ && attr_rtx_cost (newexp) < attr_rtx_cost (av->value) ++ && attr_rtx_cost (newexp) < 26 ++ ) + { + newexp = attr_copy_rtx (newexp); + remove_insn_ent (av, ie); +@@ -3944,6 +4106,10 @@ write_attr_set (FILE *outf, struct attr_ + rtx testexp; + rtx inner_true; + ++ /* Reset our_known_true after some time to not accumulate ++ too much cruft (slowing down genattrtab). */ ++ if ((i & 31) == 0) ++ our_known_true = known_true; + testexp = eliminate_known_true (our_known_true, + XVECEXP (value, 0, i), + insn_code, insn_index); diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-system-root.diff gcc-4.6-4.6.4/debian/patches/gcc-system-root.diff --- gcc-4.6-4.6.2/debian/patches/gcc-system-root.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-system-root.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,17 +1,42 @@ -# DP: Avoid include paths starting with a double slash +# DP: Remove trailing slash from system root directory ---- a/src/gcc/configure.ac -+++ b/src/gcc/configure.ac -@@ -733,8 +733,10 @@ - yes) TARGET_SYSTEM_ROOT='${exec_prefix}/${target_noncanonical}/sys-root' ;; - *) TARGET_SYSTEM_ROOT=$with_sysroot ;; - esac -- -- TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"' +--- a/src/gcc/incpath.c ++++ b/src/gcc/incpath.c +@@ -172,7 +172,15 @@ + + /* Should this directory start with the sysroot? */ + if (sysroot && p->add_sysroot) +- str = concat (sysroot, p->fname, NULL); ++ { ++ char *sysroot_no_trailing_dir_separator = xstrdup (sysroot); ++ size_t sysroot_len = strlen (sysroot); + -+ if test "x$TARGET_SYSTEM_ROOT" != x/; then -+ TARGET_SYSTEM_ROOT_DEFINE='-DTARGET_SYSTEM_ROOT=\"$(TARGET_SYSTEM_ROOT)\"' -+ fi - CROSS_SYSTEM_HEADER_DIR='$(TARGET_SYSTEM_ROOT)$${sysroot_headers_suffix}$(NATIVE_SYSTEM_HEADER_DIR)' - - if test "x$prefix" = xNONE; then ++ if (sysroot_len > 0 && sysroot[sysroot_len - 1] == DIR_SEPARATOR) ++ sysroot_no_trailing_dir_separator[sysroot_len - 1] = '\0'; ++ str = concat (sysroot_no_trailing_dir_separator, p->fname, NULL); ++ free (sysroot_no_trailing_dir_separator); ++ } + else if (!p->add_sysroot && relocated + && strncmp (p->fname, cpp_PREFIX, cpp_PREFIX_len) == 0) + { +--- a/src/gcc/gcc.c ++++ b/src/gcc/gcc.c +@@ -2440,9 +2440,17 @@ + + if (target_system_root) + { ++ char *sysroot_no_trailing_dir_separator = xstrdup (target_system_root); ++ size_t sysroot_len = strlen (target_system_root); ++ ++ if (sysroot_len > 0 ++ && target_system_root[sysroot_len - 1] == DIR_SEPARATOR) ++ sysroot_no_trailing_dir_separator[sysroot_len - 1] = '\0'; ++ + if (target_sysroot_suffix) + prefix = concat (target_sysroot_suffix, prefix, NULL); +- prefix = concat (target_system_root, prefix, NULL); ++ prefix = concat (sysroot_no_trailing_dir_separator, prefix, NULL); ++ free (sysroot_no_trailing_dir_separator); + + /* We have to override this because GCC's notion of sysroot + moves along with GCC. */ diff -Nru gcc-4.6-4.6.2/debian/patches/gcc-textdomain.diff gcc-4.6-4.6.4/debian/patches/gcc-textdomain.diff --- gcc-4.6-4.6.2/debian/patches/gcc-textdomain.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc-textdomain.diff 2013-04-14 23:00:34.000000000 +0000 @@ -15,7 +15,7 @@ open_quote = _("`"); --- a/src/gcc/Makefile.in +++ b/src/gcc/Makefile.in -@@ -5216,8 +5216,8 @@ +@@ -5157,8 +5157,8 @@ dir=$(localedir)/$$lang/LC_MESSAGES; \ echo $(mkinstalldirs) $(DESTDIR)$$dir; \ $(mkinstalldirs) $(DESTDIR)$$dir || exit 1; \ diff -Nru gcc-4.6-4.6.2/debian/patches/gcc_ada_gcc-interface_Makefile.in.diff gcc-4.6-4.6.4/debian/patches/gcc_ada_gcc-interface_Makefile.in.diff --- gcc-4.6-4.6.2/debian/patches/gcc_ada_gcc-interface_Makefile.in.diff 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/gcc_ada_gcc-interface_Makefile.in.diff 2013-04-14 23:00:34.000000000 +0000 @@ -0,0 +1,35 @@ +--- a/src/gcc/ada/gcc-interface/Makefile.in 2011-11-18 12:46:58.000000000 +0100 ++++ b/src/gcc/ada/gcc-interface/Makefile.in 2012-03-28 11:28:09.000000000 +0200 +@@ -1143,6 +1116,32 @@ + LIBRARY_VERSION := $(LIB_VERSION) + endif + ++ifeq ($(strip $(filter-out %86 gnu%,$(arch) $(osys))),) ++ LIBGNAT_TARGET_PAIRS = \ ++ a-intnam.adsrtype->elements[0]->type == FFI_TYPE_STRUCT && ++ cif->rtype->elements[1]) ++ { ++ cif->flags = 0; ++ break; ++ } ++ + switch (cif->rtype->size) + { + case 1: +@@ -163,6 +172,14 @@ + cif->flags = CIF_FLAGS_DINT; + break; + ++ case FFI_TYPE_SINT16: ++ cif->flags = CIF_FLAGS_SINT16; ++ break; ++ ++ case FFI_TYPE_SINT8: ++ cif->flags = CIF_FLAGS_SINT8; ++ break; ++ + default: + cif->flags = CIF_FLAGS_INT; + break; +@@ -261,7 +261,8 @@ + void *user_data, + void *codeloc) + { +- FFI_ASSERT (cif->abi == FFI_SYSV); ++ if (cif->abi != FFI_SYSV) ++ return FFI_BAD_ABI; + + *(unsigned short *)closure->tramp = 0x207c; + *(void **)(closure->tramp + 2) = codeloc; +--- a/src/libffi/src/m68k/ffitarget.h ++++ b/src/libffi/src/m68k/ffitarget.h +@@ -34,8 +34,8 @@ + typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, +- FFI_DEFAULT_ABI = FFI_SYSV, +- FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 ++ FFI_LAST_ABI, ++ FFI_DEFAULT_ABI = FFI_SYSV + } ffi_abi; + #endif + diff -Nru gcc-4.6-4.6.2/debian/patches/libffi-powerpc-sf.diff gcc-4.6-4.6.4/debian/patches/libffi-powerpc-sf.diff --- gcc-4.6-4.6.2/debian/patches/libffi-powerpc-sf.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libffi-powerpc-sf.diff 2013-04-14 23:00:34.000000000 +0000 @@ -19,8 +19,6 @@ src/powerpc/sysv.S | 6 + 4 files changed, 310 insertions(+), 262 deletions(-) -diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c -index fb2a39f..e5ec1c5 100644 --- a/src/libffi/src/powerpc/ffi.c +++ b/src/libffi/src/powerpc/ffi.c @@ -39,7 +39,9 @@ @@ -857,11 +855,9 @@ } int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *, -diff --git a/src/powerpc/ffitarget.h b/src/powerpc/ffitarget.h -index d17f731..820c482 100644 --- a/src/libffi/src/powerpc/ffitarget.h +++ b/src/libffi/src/powerpc/ffitarget.h -@@ -60,18 +60,14 @@ typedef enum ffi_abi { +@@ -60,18 +60,14 @@ FFI_LINUX64, FFI_LINUX, FFI_LINUX_SOFT_FLOAT, @@ -885,11 +881,9 @@ # endif #endif -diff --git a/src/powerpc/ppc_closure.S b/src/powerpc/ppc_closure.S -index 56f7d1a..41fb885 100644 --- a/src/libffi/src/powerpc/ppc_closure.S +++ b/src/libffi/src/powerpc/ppc_closure.S -@@ -122,22 +122,41 @@ ENTRY(ffi_closure_SYSV) +@@ -122,22 +122,41 @@ blr # case FFI_TYPE_FLOAT @@ -931,11 +925,9 @@ # case FFI_TYPE_UINT8 lbz %r3,112+3(%r1) -diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S -index 96ea22b..5ee3a19 100644 --- a/src/libffi/src/powerpc/sysv.S +++ b/src/libffi/src/powerpc/sysv.S -@@ -83,6 +83,7 @@ ENTRY(ffi_call_SYSV) +@@ -83,6 +83,7 @@ nop 1: @@ -943,7 +935,7 @@ /* Load all the FP registers. */ bf- 6,2f lfd %f1,-16-(8*4)-(8*8)(%r28) -@@ -94,6 +95,7 @@ ENTRY(ffi_call_SYSV) +@@ -94,6 +95,7 @@ lfd %f6,-16-(8*4)-(3*8)(%r28) lfd %f7,-16-(8*4)-(2*8)(%r28) lfd %f8,-16-(8*4)-(1*8)(%r28) @@ -951,7 +943,7 @@ 2: /* Make the call. */ -@@ -103,7 +105,9 @@ ENTRY(ffi_call_SYSV) +@@ -103,7 +105,9 @@ mtcrf 0x01,%r31 /* cr7 */ bt- 31,L(small_struct_return_value) bt- 30,L(done_return_value) @@ -961,7 +953,7 @@ stw %r3,0(%r30) bf+ 28,L(done_return_value) stw %r4,4(%r30) -@@ -124,6 +128,7 @@ L(done_return_value): +@@ -124,6 +128,7 @@ lwz %r1,0(%r1) blr @@ -969,7 +961,7 @@ L(fp_return_value): bf 28,L(float_return_value) stfd %f1,0(%r30) -@@ -134,6 +139,7 @@ L(fp_return_value): +@@ -134,6 +139,7 @@ L(float_return_value): stfs %f1,0(%r30) b L(done_return_value) @@ -977,6 +969,3 @@ L(small_struct_return_value): extrwi %r6,%r31,2,19 /* number of bytes padding = shift/8 */ --- -1.7.2.5 - diff -Nru gcc-4.6-4.6.2/debian/patches/libffi-powerpc-sysv-without-string-ops.diff gcc-4.6-4.6.4/debian/patches/libffi-powerpc-sysv-without-string-ops.diff --- gcc-4.6-4.6.2/debian/patches/libffi-powerpc-sysv-without-string-ops.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libffi-powerpc-sysv-without-string-ops.diff 2013-04-14 23:00:34.000000000 +0000 @@ -1,6 +1,6 @@ --- a/src/libffi/src/powerpc/ffi.c +++ b/src/libffi/src/powerpc/ffi.c -@@ -46,11 +46,6 @@ +@@ -45,11 +45,6 @@ FLAG_RETURNS_64BITS = 1 << (31-28), FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */ @@ -12,7 +12,7 @@ FLAG_ARG_NEEDS_COPY = 1 << (31- 7), #ifndef __NO_FPRS__ -@@ -688,37 +683,22 @@ +@@ -687,37 +682,22 @@ break; case FFI_TYPE_STRUCT: @@ -65,7 +65,7 @@ case FFI_TYPE_VOID: flags |= FLAG_RETURNS_NOTHING; break; -@@ -932,21 +912,30 @@ +@@ -931,21 +911,30 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) { @@ -106,7 +106,7 @@ switch (cif->abi) { -@@ -968,6 +957,10 @@ +@@ -967,6 +956,10 @@ FFI_ASSERT (0); break; } diff -Nru gcc-4.6-4.6.2/debian/patches/libgomp-kfreebsd-testsuite.diff gcc-4.6-4.6.4/debian/patches/libgomp-kfreebsd-testsuite.diff --- gcc-4.6-4.6.2/debian/patches/libgomp-kfreebsd-testsuite.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libgomp-kfreebsd-testsuite.diff 2013-04-14 23:00:34.000000000 +0000 @@ -12,4 +12,3 @@ int l = 0; omp_nest_lock_t lock; omp_init_nest_lock (&lock); - diff -Nru gcc-4.6-4.6.2/debian/patches/libjava-disable-static.diff gcc-4.6-4.6.4/debian/patches/libjava-disable-static.diff --- gcc-4.6-4.6.2/debian/patches/libjava-disable-static.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libjava-disable-static.diff 2013-04-14 23:00:34.000000000 +0000 @@ -6,7 +6,7 @@ --- a/src/Makefile.in +++ b/src/Makefile.in -@@ -54431,7 +54431,7 @@ +@@ -53956,7 +53956,7 @@ rm -f no-such-file || : ; \ CONFIG_SITE=no-such-file $(SHELL) $${libsrcdir}/configure \ $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ @@ -15,7 +15,7 @@ || exit 1 @endif target-libjava -@@ -55345,7 +55345,7 @@ +@@ -54870,7 +54870,7 @@ rm -f no-such-file || : ; \ CONFIG_SITE=no-such-file $(SHELL) $${libsrcdir}/configure \ $(TARGET_CONFIGARGS) --build=${build_alias} --host=${target_alias} \ diff -Nru gcc-4.6-4.6.2/debian/patches/libjava-fixed-symlinks.diff gcc-4.6-4.6.4/debian/patches/libjava-fixed-symlinks.diff --- gcc-4.6-4.6.2/debian/patches/libjava-fixed-symlinks.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libjava-fixed-symlinks.diff 2013-04-14 23:00:34.000000000 +0000 @@ -13,7 +13,7 @@ ln -sf $$RELATIVE/`echo gij | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'` \ --- a/src/libjava/Makefile.in +++ b/src/libjava/Makefile.in -@@ -12460,7 +12460,7 @@ +@@ -12461,7 +12461,7 @@ @CREATE_JAVA_HOME_TRUE@ $(mkinstalldirs) $(DESTDIR)$(SDK_INCLUDE_DIR)/$(OS) @CREATE_JAVA_HOME_TRUE@ relative() { \ @CREATE_JAVA_HOME_TRUE@ $(PERL) -e 'use File::Spec; \ diff -Nru gcc-4.6-4.6.2/debian/patches/libjava-jnipath.diff gcc-4.6-4.6.4/debian/patches/libjava-jnipath.diff --- gcc-4.6-4.6.2/debian/patches/libjava-jnipath.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libjava-jnipath.diff 2013-04-14 23:00:34.000000000 +0000 @@ -4,7 +4,7 @@ --- a/src/libjava/configure.ac +++ b/src/libjava/configure.ac -@@ -1488,6 +1488,9 @@ +@@ -1533,6 +1533,9 @@ AC_C_BIGENDIAN @@ -15,8 +15,8 @@ SYS_ZLIBS= ZINCS= --- a/src/libjava/Makefile.am -+++ a/src/libjava/Makefile.am -@@ -324,6 +324,7 @@ ++++ b/src/libjava/Makefile.am +@@ -362,6 +362,7 @@ $(WARNINGS) \ -D_GNU_SOURCE \ -DPREFIX="\"$(prefix)\"" \ @@ -26,7 +26,7 @@ -DBOOT_CLASS_PATH="\"$(BOOT_CLASS_PATH_DIR)\"" \ --- a/src/libjava/Makefile.in +++ b/src/libjava/Makefile.in -@@ -744,6 +744,7 @@ +@@ -626,6 +626,7 @@ MAKE = @MAKE@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ @@ -34,7 +34,7 @@ NM = nm NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ -@@ -1094,6 +1095,7 @@ +@@ -1008,6 +1009,7 @@ $(WARNINGS) \ -D_GNU_SOURCE \ -DPREFIX="\"$(prefix)\"" \ diff -Nru gcc-4.6-4.6.2/debian/patches/libjava-multiarch.diff gcc-4.6-4.6.4/debian/patches/libjava-multiarch.diff --- gcc-4.6-4.6.2/debian/patches/libjava-multiarch.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libjava-multiarch.diff 2013-04-14 23:00:34.000000000 +0000 @@ -2,8 +2,8 @@ --- a/src/libjava/configure.ac +++ b/src/libjava/configure.ac -@@ -1592,6 +1592,10 @@ - ../lib*) toolexeclibdir='$(subst /lib/../lib,/lib,'$toolexecmainlibdir/$multi_os_directory')' ;; +@@ -1593,6 +1593,10 @@ + .) toolexeclibdir=$toolexecmainlibdir ;; # Avoid trailing /. *) toolexeclibdir=$toolexecmainlibdir/$multi_os_directory ;; esac + multiarch=`$CC -print-multiarch` @@ -26,7 +26,7 @@ --- a/src/libjava/Makefile.am +++ b/src/libjava/Makefile.am -@@ -372,7 +372,7 @@ +@@ -371,7 +371,7 @@ -DGCJ_VERSIONED_LIBDIR="\"$(dbexecdir)\"" \ -DPATH_SEPARATOR="\"$(CLASSPATH_SEPARATOR)\"" \ -DECJ_JAR_FILE="\"$(ECJ_JAR)\"" \ @@ -37,7 +37,7 @@ AM_GCJFLAGS = \ --- a/src/libjava/Makefile.in +++ b/src/libjava/Makefile.in -@@ -1019,7 +1019,7 @@ +@@ -1018,7 +1018,7 @@ -DGCJ_VERSIONED_LIBDIR="\"$(dbexecdir)\"" \ -DPATH_SEPARATOR="\"$(CLASSPATH_SEPARATOR)\"" \ -DECJ_JAR_FILE="\"$(ECJ_JAR)\"" \ diff -Nru gcc-4.6-4.6.2/debian/patches/libjava-nobiarch-check.diff gcc-4.6-4.6.4/debian/patches/libjava-nobiarch-check.diff --- gcc-4.6-4.6.2/debian/patches/libjava-nobiarch-check.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libjava-nobiarch-check.diff 2013-04-14 23:00:34.000000000 +0000 @@ -7,7 +7,7 @@ --- a/src/libjava/testsuite/Makefile.in +++ b/src/libjava/testsuite/Makefile.in -@@ -381,12 +381,14 @@ +@@ -380,12 +380,14 @@ check-DEJAGNU: site.exp diff -Nru gcc-4.6-4.6.2/debian/patches/libstdc++-doclink.diff gcc-4.6-4.6.4/debian/patches/libstdc++-doclink.diff --- gcc-4.6-4.6.2/debian/patches/libstdc++-doclink.diff 2013-04-14 23:00:32.000000000 +0000 +++ gcc-4.6-4.6.4/debian/patches/libstdc++-doclink.diff 2013-04-14 23:00:34.000000000 +0000 @@ -40,7 +40,7 @@ +

The API documentation, rendered into HTML, can be viewed online: