--- dragonegg-2.8.orig/Makefile +++ dragonegg-2.8/Makefile @@ -91,8 +91,7 @@ @echo Linking $@ $(QUIET)$(CXX) -o $@ $(LOADABLE_MODULE_OPTIONS) $(CXXFLAGS) \ $(LD_OPTIONS) $(PLUGIN_OBJECTS) $(TARGET_OBJECT) \ - $(shell $(LLVM_CONFIG) --libs analysis core ipo scalaropts target \ - $(shell $(TARGET_UTIL) -p)) + -lLLVM-2.8 clean:: $(QUIET)rm -f *.o *.d $(PLUGIN) $(TARGET_UTIL) --- dragonegg-2.8.orig/debian/gcc-wrap.in +++ dragonegg-2.8/debian/gcc-wrap.in @@ -0,0 +1,2 @@ +#!/bin/sh +exec ${0#/usr/bin/llvm-}-4.5 -fplugin=@plugindir@/dragonegg.so "$@" --- dragonegg-2.8.orig/debian/rules +++ dragonegg-2.8/debian/rules @@ -0,0 +1,42 @@ +#!/usr/bin/make -f + +deb_version := $(shell dpkg-parsechangelog | sed -ne "s/^Version: \(.*\)/\1/p") +upstream_version := $(shell echo $(deb_version) | sed -e "s/-[^-]*$$//") +major := $(shell echo $(upstream_version) | sed -e "s/\([0-9]\+\.[0-9]\+\)[\.-].*/\1/g") + +plugindir := $(shell gcc-4.5 -print-file-name=plugin | sed 's/4\.\([5-9]\)\../4.\1/') + +include /usr/share/cdbs/1/rules/debhelper.mk +include /usr/share/cdbs/1/rules/simple-patchsys.mk + +stamps/build: apply-patches + # link with llvm-snapshot + LDFLAGS=-L/usr/lib/llvm-2.8/lib $(MAKE) \ + VERBOSE=1 \ + LLVM_CONFIG=/usr/lib/llvm-2.8/bin/llvm-config \ + -C $(DEB_SRCDIR) + mkdir -p stamps + touch $@ + +stamps/install: + mkdir -p debian/dragonegg/$(plugindir) + install -m755 $(DEB_SRCDIR)/dragonegg.so debian/dragonegg/$(plugindir)/ + mkdir -p stamps + touch $@ + +stamps/install-llvm-gcc-4.5: + mkdir -p debian/llvm-gcc-4.5/usr/bin + sed 's,@plugindir@,$(plugindir),' debian/gcc-wrap.in > debian/gcc-wrap + for i in c++ cpp g++ gcc gccbug gcov ; do \ + install -m755 debian/gcc-wrap debian/llvm-gcc-4.5/usr/bin/llvm-$$i ; \ + done + +build/dragonegg:: stamps/build +install/dragonegg:: stamps/install + +install/llvm-gcc-4.5:: stamps/install-llvm-gcc-4.5 + +clean:: + $(MAKE) clean VERBOSE=1 + rm -f debian/gcc-wrap + rm -rf stamps --- dragonegg-2.8.orig/debian/copyright +++ dragonegg-2.8/debian/copyright @@ -0,0 +1,50 @@ +Name: DragonEgg +Source: http://dragonegg.llvm.org/ +Maintainer: Duncan Sands + +Files: * +Copyright: 2009 Duncan Sands +License: GPL-2+ + +Files: llvm-backend.cpp + llvm-types.cpp + llvm-internal.h + linux/llvm-os.h + llvm-convert.cpp + llvm-cache.h + darwin/llvm-os.h + gt-llvm-cache.h + llvm-debug.cpp + llvm-cache.c + llvm-debug.h + x86/llvm-target.h + x86/llvm-target.cpp + llvm-abi.h +Copyright: 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. +License: GPL-2+ + +Files: debian/* +Copyright: 2009, Robert Millan +License: GPL-2+ + +Files: debian/patches/02_missing_gcc_headers.diff +Copyright: 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2007, 2008, 2009 Free Software Foundation, Inc. +License: GPL-3+ + +Files: debian/patches/05_gcc_i386.diff +Copyright: 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + Free Software Foundation, Inc. +License: GPL-3+ + + +License: GPL-2+ + On Debian systems the full text of the GNU General Public + License can be found in the `/usr/share/common-licenses/GPL-2' + file. + +License: GPL-3+ + On Debian systems the full text of the GNU General Public + License can be found in the `/usr/share/common-licenses/GPL' + file. --- dragonegg-2.8.orig/debian/compat +++ dragonegg-2.8/debian/compat @@ -0,0 +1 @@ +7 --- dragonegg-2.8.orig/debian/changelog +++ dragonegg-2.8/debian/changelog @@ -0,0 +1,65 @@ +dragonegg (2.8-0ubuntu2) natty; urgency=low + + * Rebuild for GCC multiarch locations. + + -- Matthias Klose Sun, 03 Apr 2011 11:53:59 +0200 + +dragonegg (2.8-0ubuntu1) maverick-proposed; urgency=low + + * Final 2.8 release. LP: #632727. + + -- Matthias Klose Thu, 14 Oct 2010 18:50:58 +0200 + +dragonegg (2.8~20101006-0ubuntu1) maverick; urgency=low + + * New upstream version, taken from the trunk. + + -- Matthias Klose Wed, 06 Oct 2010 16:05:42 +0200 + +dragonegg (2.8~20100921-0ubuntu1) maverick; urgency=low + + * New upstream version, taken from the trunk. + + -- Matthias Klose Tue, 21 Sep 2010 22:39:41 +0200 + +dragonegg (2.8~20100911-0ubuntu1) maverick; urgency=low + + * New upstream version, taken from the trunk. + + -- Matthias Klose Mon, 13 Sep 2010 10:37:04 +0200 + +dragonegg (2.8~20100907-0ubuntu1) maverick; urgency=low + + * New upstream version, taken from the trunk. + + -- Matthias Klose Tue, 07 Sep 2010 17:24:30 +0200 + +dragonegg (2.7-0ubuntu2) maverick; urgency=low + + * Build-depend on llvm-2.7-dev. + + -- Matthias Klose Wed, 02 Jun 2010 00:22:09 +0200 + +dragonegg (2.7-0ubuntu1) maverick; urgency=low + + * Upload to maverick. + + -- Matthias Klose Fri, 21 May 2010 13:22:45 +0200 + +dragonegg (2.7-0ubuntu1~ppa3) lucid; urgency=low + + * New upstream version. + + -- Matthias Klose Tue, 27 Apr 2010 15:09:49 +0200 + +dragonegg (0~20091229-2) experimental; urgency=low + + * control (llvm-gcc-4.5): Provide and conflict with llvm-gcc. + + -- Robert Millan Mon, 04 Jan 2010 13:15:08 +0100 + +dragonegg (0~20091229-1) experimental; urgency=low + + * Initial release. (Closes: #563315) + + -- Robert Millan Fri, 01 Jan 2010 22:55:02 +0100 --- dragonegg-2.8.orig/debian/control +++ dragonegg-2.8/debian/control @@ -0,0 +1,42 @@ +Source: dragonegg +Section: devel +Priority: optional +Maintainer: Robert Millan +Uploaders: Arthur Loiret +Build-Depends: + cdbs, + debhelper (>= 7), + gcc-4.5-plugin-dev, + llvm-2.8-dev (>= 2.8), + libffi-dev, + libmpfr-dev, + libmpc-dev, +Standards-Version: 3.7.3 +Homepage: http://dragonegg.llvm.org/ + +Package: dragonegg +Architecture: i386 kfreebsd-i386 hurd-i386 kopensolaris-i386 amd64 kfreebsd-amd64 lpia +Depends: ${shlibs:Depends}, ${misc:Depends} +Recommends: gcc-4.5 | g++-4.5 +Description: GCC plugin that uses LLVM for optimization and code generation + DragonEgg is a GCC plugin (dragonegg.so) that replaces GCC's optimizers + and code generators with those from the LLVM project. + . + It is a reimplementation of llvm-gcc that works with gcc-4.5 or later. + . + DragonEgg is under heavy development and is not mature - it may crash or + produce wrong code. + +Package: llvm-gcc-4.5 +Architecture: i386 kfreebsd-i386 hurd-i386 kopensolaris-i386 amd64 kfreebsd-amd64 lpia +Depends: ${shlibs:Depends}, ${misc:Depends}, dragonegg, gcc-4.5, g++-4.5 +Provides: llvm-gcc +Conflicts: llvm-gcc +Description: C front end for LLVM C/C++ compiler + The Low-Level Virtual Machine (LLVM) is a collection of libraries and + tools that make it easy to build compilers, optimizers, Just-In-Time + code generators, and many other compiler-related programs. + . + This is the DragonEgg-based version of llvm-gcc. Note that DragonEgg + is under heavy development and is not mature - it may crash or + produce wrong code. --- dragonegg-2.8.orig/debian/patches/06_no_llvm-os_header.diff +++ dragonegg-2.8/debian/patches/06_no_llvm-os_header.diff @@ -0,0 +1,11 @@ +--- ./llvm-backend.cpp~ 2010-09-07 17:20:53.000000000 +0200 ++++ ./llvm-backend.cpp 2010-09-07 17:30:35.470942297 +0200 +@@ -25,7 +25,7 @@ + } + #include "llvm-debug.h" + #include "llvm-internal.h" +-#include "llvm-os.h" ++// #include "llvm-os.h" + #include "llvm-target.h" + + // LLVM headers --- dragonegg-2.8.orig/debian/patches/04_hasnestattr.diff +++ dragonegg-2.8/debian/patches/04_hasnestattr.diff @@ -0,0 +1,11 @@ +--- dragonegg-0~20091229/llvm-backend.cpp~ 2009-12-29 20:32:56.000000000 +0100 ++++ dragonegg-0~20091229/llvm-backend.cpp 2009-12-29 21:08:06.000000000 +0100 +@@ -1644,7 +1644,7 @@ + // additional artificial arguments for doing struct return or passing a + // nested function static chain. Look for 'this' while passing through + // all arguments except for 'this' unchanged. +- if (FoundThis || AI->hasStructRetAttr() || AI->hasNestAttr()) { ++ if (FoundThis || AI->hasStructRetAttr()) { + Arguments.push_back(AI); + continue; + } --- dragonegg-2.8.orig/debian/patches/05_gcc_i386.diff +++ dragonegg-2.8/debian/patches/05_gcc_i386.diff @@ -0,0 +1,2351 @@ + +Temporary replacement for [dragonegg]/gcc-patches/i386_static.diff. Upstream +(Duncan) said he has plans for a proper solution other than merging +i386_static.diff. This patch should be removed as soon as this solution is +available. + +--- Makefile~ 2010-04-20 09:25:47.000000000 +0200 ++++ Makefile 2010-04-27 15:55:19.464836030 +0200 +@@ -34,7 +34,7 @@ + + PLUGIN=dragonegg.so + PLUGIN_OBJECTS=llvm-cache.o llvm-convert.o llvm-backend.o llvm-debug.o \ +- llvm-types.o bits_and_bobs.o llvm-abi-default.o ++ llvm-types.o bits_and_bobs.o llvm-abi-default.o gcc-i386.o + + TARGET_OBJECT=llvm-target.o + TARGET_SOURCE=$(SRC_DIR)/$(shell $(TARGET_UTIL) -p)/llvm-target.cpp +--- gcc-i386.c 1970-01-01 00:00:00 +0000 ++++ gcc-i386.c 2009-12-29 12:28:35 +0000 +@@ -0,0 +1,2330 @@ ++ ++/* ++ * Derived from [gcc]/gcc/config/i386/i386.c ++ * (pre-4.5 snapshot taken on 20091223) ++ */ ++ ++ ++/* Subroutines used for code generation on IA-32. ++ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, ++ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 ++ Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "rtl.h" ++#include "tree.h" ++#include "tm_p.h" ++#include "hard-reg-set.h" ++#include "real.h" ++#include "output.h" ++#include "flags.h" ++#include "except.h" ++#include "function.h" ++#include "toplev.h" ++#include "basic-block.h" ++#include "ggc.h" ++#include "target.h" ++#include "langhooks.h" ++#include "cgraph.h" ++#include "gimple.h" ++#include "params.h" ++ ++#ifndef CHECK_STACK_LIMIT ++#define CHECK_STACK_LIMIT (-1) ++#endif ++ ++/* Return index of given mode in mult and division cost tables. */ ++#define MODE_INDEX(mode) \ ++ ((mode) == QImode ? 0 \ ++ : (mode) == HImode ? 1 \ ++ : (mode) == SImode ? 2 \ ++ : (mode) == DImode ? 3 \ ++ : 4) ++ ++/* Processor costs (relative to an add) */ ++/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ ++#define COSTS_N_BYTES(N) ((N) * 2) ++ ++#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}} ++ ++const ++struct processor_costs ix86_size_cost = {/* costs for tuning for size */ ++ COSTS_N_BYTES (2), /* cost of an add instruction */ ++ COSTS_N_BYTES (3), /* cost of a lea instruction */ ++ COSTS_N_BYTES (2), /* variable shift costs */ ++ COSTS_N_BYTES (3), /* constant shift costs */ ++ {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ ++ COSTS_N_BYTES (3), /* HI */ ++ COSTS_N_BYTES (3), /* SI */ ++ COSTS_N_BYTES (3), /* DI */ ++ COSTS_N_BYTES (5)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ ++ COSTS_N_BYTES (3), /* HI */ ++ COSTS_N_BYTES (3), /* SI */ ++ COSTS_N_BYTES (3), /* DI */ ++ COSTS_N_BYTES (5)}, /* other */ ++ COSTS_N_BYTES (3), /* cost of movsx */ ++ COSTS_N_BYTES (3), /* cost of movzx */ ++ 0, /* "large" insn */ ++ 2, /* MOVE_RATIO */ ++ 2, /* cost for loading QImode using movzbl */ ++ {2, 2, 2}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 2, 2}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {2, 2, 2}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {2, 2, 2}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 3, /* cost of moving MMX register */ ++ {3, 3}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {3, 3}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 3, /* cost of moving SSE register */ ++ {3, 3, 3}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {3, 3, 3}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 3, /* MMX or SSE register to integer */ ++ 0, /* size of l1 cache */ ++ 0, /* size of l2 cache */ ++ 0, /* size of prefetch block */ ++ 0, /* number of parallel prefetches */ ++ 2, /* Branch cost */ ++ COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ ++ COSTS_N_BYTES (2), /* cost of FMUL instruction. */ ++ COSTS_N_BYTES (2), /* cost of FDIV instruction. */ ++ COSTS_N_BYTES (2), /* cost of FABS instruction. */ ++ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ ++ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ ++ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, ++ {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, ++ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, ++ {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 1, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 1, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++/* Processor costs (relative to an add) */ ++static const ++struct processor_costs i386_cost = { /* 386 specific costs */ ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction */ ++ COSTS_N_INSNS (3), /* variable shift costs */ ++ COSTS_N_INSNS (2), /* constant shift costs */ ++ {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (6), /* HI */ ++ COSTS_N_INSNS (6), /* SI */ ++ COSTS_N_INSNS (6), /* DI */ ++ COSTS_N_INSNS (6)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (23), /* HI */ ++ COSTS_N_INSNS (23), /* SI */ ++ COSTS_N_INSNS (23), /* DI */ ++ COSTS_N_INSNS (23)}, /* other */ ++ COSTS_N_INSNS (3), /* cost of movsx */ ++ COSTS_N_INSNS (2), /* cost of movzx */ ++ 15, /* "large" insn */ ++ 3, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {2, 4, 2}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 4, 2}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {8, 8, 8}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {8, 8, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {4, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {4, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {4, 8, 16}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 8, 16}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 3, /* MMX or SSE register to integer */ ++ 0, /* size of l1 cache */ ++ 0, /* size of l2 cache */ ++ 0, /* size of prefetch block */ ++ 0, /* number of parallel prefetches */ ++ 1, /* Branch cost */ ++ COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (27), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (88), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (22), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (24), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ ++ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs i486_cost = { /* 486 specific costs */ ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction */ ++ COSTS_N_INSNS (3), /* variable shift costs */ ++ COSTS_N_INSNS (2), /* constant shift costs */ ++ {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (12), /* HI */ ++ COSTS_N_INSNS (12), /* SI */ ++ COSTS_N_INSNS (12), /* DI */ ++ COSTS_N_INSNS (12)}, /* other */ ++ 1, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (40), /* HI */ ++ COSTS_N_INSNS (40), /* SI */ ++ COSTS_N_INSNS (40), /* DI */ ++ COSTS_N_INSNS (40)}, /* other */ ++ COSTS_N_INSNS (3), /* cost of movsx */ ++ COSTS_N_INSNS (2), /* cost of movzx */ ++ 15, /* "large" insn */ ++ 3, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {2, 4, 2}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 4, 2}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {8, 8, 8}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {8, 8, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {4, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {4, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {4, 8, 16}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 8, 16}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 3, /* MMX or SSE register to integer */ ++ 4, /* size of l1 cache. 486 has 8kB cache ++ shared for code and data, so 4kB is ++ not really precise. */ ++ 4, /* size of l2 cache */ ++ 0, /* size of prefetch block */ ++ 0, /* number of parallel prefetches */ ++ 1, /* Branch cost */ ++ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (16), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (73), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (3), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ ++ {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs pentium_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction */ ++ COSTS_N_INSNS (4), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (11), /* HI */ ++ COSTS_N_INSNS (11), /* SI */ ++ COSTS_N_INSNS (11), /* DI */ ++ COSTS_N_INSNS (11)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (25), /* HI */ ++ COSTS_N_INSNS (25), /* SI */ ++ COSTS_N_INSNS (25), /* DI */ ++ COSTS_N_INSNS (25)}, /* other */ ++ COSTS_N_INSNS (3), /* cost of movsx */ ++ COSTS_N_INSNS (2), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 6, /* MOVE_RATIO */ ++ 6, /* cost for loading QImode using movzbl */ ++ {2, 4, 2}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 4, 2}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {2, 2, 6}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 4, 6}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 8, /* cost of moving MMX register */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {4, 8, 16}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 8, 16}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 3, /* MMX or SSE register to integer */ ++ 8, /* size of l1 cache. */ ++ 8, /* size of l2 cache */ ++ 0, /* size of prefetch block */ ++ 0, /* number of parallel prefetches */ ++ 2, /* Branch cost */ ++ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (3), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (39), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ ++ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{libcall, {{-1, rep_prefix_4_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs pentiumpro_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (4), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (4)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (17), /* HI */ ++ COSTS_N_INSNS (17), /* SI */ ++ COSTS_N_INSNS (17), /* DI */ ++ COSTS_N_INSNS (17)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 6, /* MOVE_RATIO */ ++ 2, /* cost for loading QImode using movzbl */ ++ {4, 4, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 2, 2}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {2, 2, 6}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 4, 6}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {2, 2}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {2, 2}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {2, 2, 8}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {2, 2, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 3, /* MMX or SSE register to integer */ ++ 8, /* size of l1 cache. */ ++ 256, /* size of l2 cache */ ++ 32, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ 2, /* Branch cost */ ++ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (5), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (56), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ ++ /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure ++ the alignment). For small blocks inline loop is still a noticeable win, for bigger ++ blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently ++ more expensive startup time in CPU, but after 4K the difference is down in the noise. ++ */ ++ {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop}, ++ {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{rep_prefix_4_byte, {{1024, unrolled_loop}, ++ {8192, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs geode_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction */ ++ COSTS_N_INSNS (2), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (7), /* SI */ ++ COSTS_N_INSNS (7), /* DI */ ++ COSTS_N_INSNS (7)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (23), /* HI */ ++ COSTS_N_INSNS (39), /* SI */ ++ COSTS_N_INSNS (39), /* DI */ ++ COSTS_N_INSNS (39)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 4, /* MOVE_RATIO */ ++ 1, /* cost for loading QImode using movzbl */ ++ {1, 1, 1}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {1, 1, 1}, /* cost of storing integer registers */ ++ 1, /* cost of reg,reg fld/fst */ ++ {1, 1, 1}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 6, 6}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ ++ 1, /* cost of moving MMX register */ ++ {1, 1}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {1, 1}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 1, /* cost of moving SSE register */ ++ {1, 1, 1}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {1, 1, 1}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 1, /* MMX or SSE register to integer */ ++ 64, /* size of l1 cache. */ ++ 128, /* size of l2 cache. */ ++ 32, /* size of prefetch block */ ++ 1, /* number of parallel prefetches */ ++ 1, /* Branch cost */ ++ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (11), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (47), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ ++ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs k6_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (2), /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (3), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (3), /* DI */ ++ COSTS_N_INSNS (3)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (18), /* HI */ ++ COSTS_N_INSNS (18), /* SI */ ++ COSTS_N_INSNS (18), /* DI */ ++ COSTS_N_INSNS (18)}, /* other */ ++ COSTS_N_INSNS (2), /* cost of movsx */ ++ COSTS_N_INSNS (2), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 4, /* MOVE_RATIO */ ++ 3, /* cost for loading QImode using movzbl */ ++ {4, 5, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 3, 2}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {6, 6, 6}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 4, 4}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {2, 2}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {2, 2}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {2, 2, 8}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {2, 2, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 6, /* MMX or SSE register to integer */ ++ 32, /* size of l1 cache. */ ++ 32, /* size of l2 cache. Some models ++ have integrated l2 cache, but ++ optimizing for k6 is not important ++ enough to worry about that. */ ++ 32, /* size of prefetch block */ ++ 1, /* number of parallel prefetches */ ++ 1, /* Branch cost */ ++ COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (2), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (56), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ ++ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs athlon_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (2), /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (5), /* HI */ ++ COSTS_N_INSNS (5), /* SI */ ++ COSTS_N_INSNS (5), /* DI */ ++ COSTS_N_INSNS (5)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (26), /* HI */ ++ COSTS_N_INSNS (42), /* SI */ ++ COSTS_N_INSNS (74), /* DI */ ++ COSTS_N_INSNS (74)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 9, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {3, 4, 3}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {3, 4, 3}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {4, 4, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {4, 4}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {4, 4}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {4, 4, 6}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 4, 5}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 5, /* MMX or SSE register to integer */ ++ 64, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ 5, /* Branch cost */ ++ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (4), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (24), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ ++ /* For some reason, Athlon deals better with REP prefix (relative to loops) ++ compared to K8. Alignment becomes important after 8 bytes for memcpy and ++ 128 bytes for memset. */ ++ {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs k8_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (2), /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (5)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (26), /* HI */ ++ COSTS_N_INSNS (42), /* SI */ ++ COSTS_N_INSNS (74), /* DI */ ++ COSTS_N_INSNS (74)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 9, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {3, 4, 3}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {3, 4, 3}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {4, 4, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {3, 3}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {4, 4}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {4, 3, 6}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 4, 5}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 5, /* MMX or SSE register to integer */ ++ 64, /* size of l1 cache. */ ++ 512, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ /* New AMD processors never drop prefetches; if they cannot be performed ++ immediately, they are queued. We set number of simultaneous prefetches ++ to a large constant to reflect this (it probably is not a good idea not ++ to limit number of prefetches at all, as their execution also takes some ++ time). */ ++ 100, /* number of parallel prefetches */ ++ 3, /* Branch cost */ ++ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (4), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (19), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ ++ /* K8 has optimized REP instruction for medium sized blocks, but for very small ++ blocks it is better to use loop. For large blocks, libcall can do ++ nontemporary accesses and beat inline considerably. */ ++ {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, ++ {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ {{libcall, {{8, loop}, {24, unrolled_loop}, ++ {2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 4, /* scalar_stmt_cost. */ ++ 2, /* scalar load_cost. */ ++ 2, /* scalar_store_cost. */ ++ 5, /* vec_stmt_cost. */ ++ 0, /* vec_to_scalar_cost. */ ++ 2, /* scalar_to_vec_cost. */ ++ 2, /* vec_align_load_cost. */ ++ 3, /* vec_unalign_load_cost. */ ++ 3, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 2, /* cond_not_taken_branch_cost. */ ++}; ++ ++struct processor_costs amdfam10_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (2), /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (5)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (35), /* HI */ ++ COSTS_N_INSNS (51), /* SI */ ++ COSTS_N_INSNS (83), /* DI */ ++ COSTS_N_INSNS (83)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 9, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {3, 4, 3}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {3, 4, 3}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {4, 4, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {3, 3}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {4, 4}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {4, 4, 3}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 4, 5}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 3, /* MMX or SSE register to integer */ ++ /* On K8 ++ MOVD reg64, xmmreg Double FSTORE 4 ++ MOVD reg32, xmmreg Double FSTORE 4 ++ On AMDFAM10 ++ MOVD reg64, xmmreg Double FADD 3 ++ 1/1 1/1 ++ MOVD reg32, xmmreg Double FADD 3 ++ 1/1 1/1 */ ++ 64, /* size of l1 cache. */ ++ 512, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ /* New AMD processors never drop prefetches; if they cannot be performed ++ immediately, they are queued. We set number of simultaneous prefetches ++ to a large constant to reflect this (it probably is not a good idea not ++ to limit number of prefetches at all, as their execution also takes some ++ time). */ ++ 100, /* number of parallel prefetches */ ++ 2, /* Branch cost */ ++ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (4), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (19), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ ++ ++ /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for ++ very small blocks it is better to use loop. For large blocks, libcall can ++ do nontemporary accesses and beat inline considerably. */ ++ {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, ++ {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ {{libcall, {{8, loop}, {24, unrolled_loop}, ++ {2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 4, /* scalar_stmt_cost. */ ++ 2, /* scalar load_cost. */ ++ 2, /* scalar_store_cost. */ ++ 6, /* vec_stmt_cost. */ ++ 0, /* vec_to_scalar_cost. */ ++ 2, /* scalar_to_vec_cost. */ ++ 2, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 2, /* vec_store_cost. */ ++ 2, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs pentium4_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (3), /* cost of a lea instruction */ ++ COSTS_N_INSNS (4), /* variable shift costs */ ++ COSTS_N_INSNS (4), /* constant shift costs */ ++ {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (15), /* HI */ ++ COSTS_N_INSNS (15), /* SI */ ++ COSTS_N_INSNS (15), /* DI */ ++ COSTS_N_INSNS (15)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (56), /* HI */ ++ COSTS_N_INSNS (56), /* SI */ ++ COSTS_N_INSNS (56), /* DI */ ++ COSTS_N_INSNS (56)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 16, /* "large" insn */ ++ 6, /* MOVE_RATIO */ ++ 2, /* cost for loading QImode using movzbl */ ++ {4, 5, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {2, 3, 2}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {2, 2, 6}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 4, 6}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {2, 2}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {2, 2}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 12, /* cost of moving SSE register */ ++ {12, 12, 12}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {2, 2, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 10, /* MMX or SSE register to integer */ ++ 8, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ 2, /* Branch cost */ ++ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (7), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (43), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ ++ {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, ++ {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs nocona_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (10), /* HI */ ++ COSTS_N_INSNS (10), /* SI */ ++ COSTS_N_INSNS (10), /* DI */ ++ COSTS_N_INSNS (10)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (66), /* HI */ ++ COSTS_N_INSNS (66), /* SI */ ++ COSTS_N_INSNS (66), /* DI */ ++ COSTS_N_INSNS (66)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 16, /* "large" insn */ ++ 17, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {4, 4, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {4, 4, 4}, /* cost of storing integer registers */ ++ 3, /* cost of reg,reg fld/fst */ ++ {12, 12, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 4, 4}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 6, /* cost of moving MMX register */ ++ {12, 12}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {12, 12}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 6, /* cost of moving SSE register */ ++ {12, 12, 12}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {12, 12, 12}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 8, /* MMX or SSE register to integer */ ++ 8, /* size of l1 cache. */ ++ 1024, /* size of l2 cache. */ ++ 128, /* size of prefetch block */ ++ 8, /* number of parallel prefetches */ ++ 1, /* Branch cost */ ++ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (8), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (40), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (3), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (3), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ ++ {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, ++ {libcall, {{32, loop}, {20000, rep_prefix_8_byte}, ++ {100000, unrolled_loop}, {-1, libcall}}}}, ++ {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, ++ {-1, libcall}}}, ++ {libcall, {{24, loop}, {64, unrolled_loop}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs core2_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (3), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (3), /* DI */ ++ COSTS_N_INSNS (3)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (22), /* HI */ ++ COSTS_N_INSNS (22), /* SI */ ++ COSTS_N_INSNS (22), /* DI */ ++ COSTS_N_INSNS (22)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 16, /* MOVE_RATIO */ ++ 2, /* cost for loading QImode using movzbl */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {4, 4, 4}, /* cost of storing integer registers */ ++ 2, /* cost of reg,reg fld/fst */ ++ {6, 6, 6}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {4, 4, 4}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {4, 4}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {6, 6, 6}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {4, 4, 4}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 2, /* MMX or SSE register to integer */ ++ 32, /* size of l1 cache. */ ++ 2048, /* size of l2 cache. */ ++ 128, /* size of prefetch block */ ++ 8, /* number of parallel prefetches */ ++ 3, /* Branch cost */ ++ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (5), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (32), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ ++ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, ++ {libcall, {{32, loop}, {64, rep_prefix_4_byte}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ {{libcall, {{8, loop}, {15, unrolled_loop}, ++ {2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ {libcall, {{24, loop}, {32, unrolled_loop}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++static const ++struct processor_costs atom_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (2)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (26), /* HI */ ++ COSTS_N_INSNS (42), /* SI */ ++ COSTS_N_INSNS (74), /* DI */ ++ COSTS_N_INSNS (74)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 17, /* MOVE_RATIO */ ++ 2, /* cost for loading QImode using movzbl */ ++ {4, 4, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {4, 4, 4}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {12, 12, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {8, 8, 8}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {8, 8, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 5, /* MMX or SSE register to integer */ ++ 32, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ 3, /* Branch cost */ ++ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (8), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (20), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (8), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ ++ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, ++ {libcall, {{32, loop}, {64, rep_prefix_4_byte}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ {{libcall, {{8, loop}, {15, unrolled_loop}, ++ {2048, rep_prefix_4_byte}, {-1, libcall}}}, ++ {libcall, {{24, loop}, {32, unrolled_loop}, ++ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++/* Generic64 should produce code tuned for Nocona and K8. */ ++static const ++struct processor_costs generic64_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ /* On all chips taken into consideration lea is 2 cycles and more. With ++ this cost however our current implementation of synth_mult results in ++ use of unnecessary temporary registers causing regression on several ++ SPECfp benchmarks. */ ++ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (2)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (26), /* HI */ ++ COSTS_N_INSNS (42), /* SI */ ++ COSTS_N_INSNS (74), /* DI */ ++ COSTS_N_INSNS (74)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 17, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {4, 4, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {4, 4, 4}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {12, 12, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {8, 8, 8}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {8, 8, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 5, /* MMX or SSE register to integer */ ++ 32, /* size of l1 cache. */ ++ 512, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value ++ is increased to perhaps more appropriate value of 5. */ ++ 3, /* Branch cost */ ++ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (8), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (20), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (8), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ ++ {DUMMY_STRINGOP_ALGS, ++ {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ {DUMMY_STRINGOP_ALGS, ++ {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ ++static const ++struct processor_costs generic32_cost = { ++ COSTS_N_INSNS (1), /* cost of an add instruction */ ++ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ ++ COSTS_N_INSNS (1), /* variable shift costs */ ++ COSTS_N_INSNS (1), /* constant shift costs */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ ++ COSTS_N_INSNS (4), /* HI */ ++ COSTS_N_INSNS (3), /* SI */ ++ COSTS_N_INSNS (4), /* DI */ ++ COSTS_N_INSNS (2)}, /* other */ ++ 0, /* cost of multiply per each bit set */ ++ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ ++ COSTS_N_INSNS (26), /* HI */ ++ COSTS_N_INSNS (42), /* SI */ ++ COSTS_N_INSNS (74), /* DI */ ++ COSTS_N_INSNS (74)}, /* other */ ++ COSTS_N_INSNS (1), /* cost of movsx */ ++ COSTS_N_INSNS (1), /* cost of movzx */ ++ 8, /* "large" insn */ ++ 17, /* MOVE_RATIO */ ++ 4, /* cost for loading QImode using movzbl */ ++ {4, 4, 4}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {4, 4, 4}, /* cost of storing integer registers */ ++ 4, /* cost of reg,reg fld/fst */ ++ {12, 12, 12}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode */ ++ 2, /* cost of moving MMX register */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode */ ++ 2, /* cost of moving SSE register */ ++ {8, 8, 8}, /* cost of loading SSE registers ++ in SImode, DImode and TImode */ ++ {8, 8, 8}, /* cost of storing SSE registers ++ in SImode, DImode and TImode */ ++ 5, /* MMX or SSE register to integer */ ++ 32, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block */ ++ 6, /* number of parallel prefetches */ ++ 3, /* Branch cost */ ++ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (8), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (20), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (8), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ ++ {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, ++ DUMMY_STRINGOP_ALGS}, ++ 1, /* scalar_stmt_cost. */ ++ 1, /* scalar load_cost. */ ++ 1, /* scalar_store_cost. */ ++ 1, /* vec_stmt_cost. */ ++ 1, /* vec_to_scalar_cost. */ ++ 1, /* scalar_to_vec_cost. */ ++ 1, /* vec_align_load_cost. */ ++ 2, /* vec_unalign_load_cost. */ ++ 1, /* vec_store_cost. */ ++ 3, /* cond_taken_branch_cost. */ ++ 1, /* cond_not_taken_branch_cost. */ ++}; ++ ++const struct processor_costs *ix86_cost = &pentium_cost; ++ ++/* Processor feature/optimization bitmasks. */ ++#define m_386 (1< to_allocate <- FRAME_POINTER ++ [frame] ( ++ ) ++ [padding2] / ++ */ ++struct ix86_frame ++{ ++ int padding0; ++ int nsseregs; ++ int nregs; ++ int padding1; ++ int va_arg_size; ++ HOST_WIDE_INT frame; ++ int padding2; ++ int outgoing_arguments_size; ++ int red_zone_size; ++ ++ HOST_WIDE_INT to_allocate; ++ /* The offsets relative to ARG_POINTER. */ ++ HOST_WIDE_INT frame_pointer_offset; ++ HOST_WIDE_INT hard_frame_pointer_offset; ++ HOST_WIDE_INT stack_pointer_offset; ++ ++ /* When save_regs_using_mov is set, emit prologue using ++ move instead of push instructions. */ ++ bool save_regs_using_mov; ++}; ++ ++/* Code model option. */ ++enum cmodel ix86_cmodel; ++/* Asm dialect. */ ++enum asm_dialect ix86_asm_dialect = ASM_ATT; ++/* TLS dialects. */ ++enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; ++ ++/* Which unit we are generating floating point math for. */ ++enum fpmath_unit ix86_fpmath; ++ ++/* Which cpu are we optimizing for. */ ++enum processor_type ix86_tune; ++ ++/* Which instruction set architecture to use. */ ++enum processor_type ix86_arch; ++ ++/* true if sse prefetch instruction is not NOOP. */ ++int x86_prefetch_sse; ++ ++/* -mstackrealign option */ ++extern int ix86_force_align_arg_pointer; ++static const char ix86_force_align_arg_pointer_string[] ++ = "force_align_arg_pointer"; ++ ++/* Preferred alignment for stack boundary in bits. */ ++unsigned int ix86_preferred_stack_boundary; ++ ++/* The abi used by target. */ ++enum calling_abi ix86_abi; ++ ++/* Values 1-5: see jump.c */ ++int ix86_branch_cost; ++ ++/* Variables which are this size or smaller are put in the data/bss ++ or ldata/lbss sections. */ ++ ++int ix86_section_threshold = 65536; ++ ++/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ ++char internal_label_prefix[16]; ++int internal_label_prefix_len; ++ ++/* Fence to use after loop using movnt. */ ++tree x86_mfence; ++ ++/* Register class used for passing given 64bit part of the argument. ++ These represent classes as documented by the PS ABI, with the exception ++ of SSESF, SSEDF classes, that are basically SSE class, just gcc will ++ use SF or DFmode move instead of DImode to avoid reformatting penalties. ++ ++ Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves ++ whenever possible (upper half does contain padding). */ ++enum x86_64_reg_class ++ { ++ X86_64_NO_CLASS, ++ X86_64_INTEGER_CLASS, ++ X86_64_INTEGERSI_CLASS, ++ X86_64_SSE_CLASS, ++ X86_64_SSESF_CLASS, ++ X86_64_SSEDF_CLASS, ++ X86_64_SSEUP_CLASS, ++ X86_64_X87_CLASS, ++ X86_64_X87UP_CLASS, ++ X86_64_COMPLEX_X87_CLASS, ++ X86_64_MEMORY_CLASS ++ }; ++ ++#define MAX_CLASSES 4 ++ ++ ++ ++enum ix86_function_specific_strings ++{ ++ IX86_FUNCTION_SPECIFIC_ARCH, ++ IX86_FUNCTION_SPECIFIC_TUNE, ++ IX86_FUNCTION_SPECIFIC_FPMATH, ++ IX86_FUNCTION_SPECIFIC_MAX ++}; ++ ++ ++/* The svr4 ABI for the i386 says that records and unions are returned ++ in memory. */ ++#ifndef DEFAULT_PCC_STRUCT_RETURN ++#define DEFAULT_PCC_STRUCT_RETURN 1 ++#endif ++ ++/* Bit flags that specify the ISA we are compiling for. */ ++int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; ++ ++/* Define a set of ISAs which are available when a given ISA is ++ enabled. MMX and SSE ISAs are handled separately. */ ++ ++#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX ++#define OPTION_MASK_ISA_3DNOW_SET \ ++ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET) ++ ++#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE ++#define OPTION_MASK_ISA_SSE2_SET \ ++ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET) ++#define OPTION_MASK_ISA_SSE3_SET \ ++ (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET) ++#define OPTION_MASK_ISA_SSSE3_SET \ ++ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET) ++#define OPTION_MASK_ISA_SSE4_1_SET \ ++ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET) ++#define OPTION_MASK_ISA_SSE4_2_SET \ ++ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET) ++#define OPTION_MASK_ISA_AVX_SET \ ++ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET) ++#define OPTION_MASK_ISA_FMA_SET \ ++ (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET) ++ ++/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same ++ as -msse4.2. */ ++#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET ++ ++#define OPTION_MASK_ISA_SSE4A_SET \ ++ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET) ++#define OPTION_MASK_ISA_FMA4_SET \ ++ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \ ++ | OPTION_MASK_ISA_AVX_SET) ++#define OPTION_MASK_ISA_XOP_SET \ ++ (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET) ++#define OPTION_MASK_ISA_LWP_SET \ ++ OPTION_MASK_ISA_LWP ++ ++/* AES and PCLMUL need SSE2 because they use xmm registers */ ++#define OPTION_MASK_ISA_AES_SET \ ++ (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET) ++#define OPTION_MASK_ISA_PCLMUL_SET \ ++ (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET) ++ ++#define OPTION_MASK_ISA_ABM_SET \ ++ (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) ++ ++#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT ++#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 ++#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF ++#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE ++#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32 ++ ++/* Define a set of ISAs which aren't available when a given ISA is ++ disabled. MMX and SSE ISAs are handled separately. */ ++ ++#define OPTION_MASK_ISA_MMX_UNSET \ ++ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET) ++#define OPTION_MASK_ISA_3DNOW_UNSET \ ++ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET) ++#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A ++ ++#define OPTION_MASK_ISA_SSE_UNSET \ ++ (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET) ++#define OPTION_MASK_ISA_SSE2_UNSET \ ++ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET) ++#define OPTION_MASK_ISA_SSE3_UNSET \ ++ (OPTION_MASK_ISA_SSE3 \ ++ | OPTION_MASK_ISA_SSSE3_UNSET \ ++ | OPTION_MASK_ISA_SSE4A_UNSET ) ++#define OPTION_MASK_ISA_SSSE3_UNSET \ ++ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET) ++#define OPTION_MASK_ISA_SSE4_1_UNSET \ ++ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET) ++#define OPTION_MASK_ISA_SSE4_2_UNSET \ ++ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET ) ++#define OPTION_MASK_ISA_AVX_UNSET \ ++ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \ ++ | OPTION_MASK_ISA_FMA4_UNSET) ++#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA ++ ++/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same ++ as -mno-sse4.1. */ ++#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET ++ ++#define OPTION_MASK_ISA_SSE4A_UNSET \ ++ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET) ++ ++#define OPTION_MASK_ISA_FMA4_UNSET \ ++ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET) ++#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP ++#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP ++ ++#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES ++#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL ++#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM ++#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT ++#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 ++#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF ++#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE ++#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32 ++ ++#if 0 ++/* Vectorization library interface and handlers. */ ++tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; ++static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); ++static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); ++#endif ++ ++/* Processor target table, indexed by processor number */ ++struct ptt ++{ ++ const struct processor_costs *cost; /* Processor costs */ ++ const int align_loop; /* Default alignments. */ ++ const int align_loop_max_skip; ++ const int align_jump; ++ const int align_jump_max_skip; ++ const int align_func; ++}; ++ ++static const struct ptt processor_target_table[PROCESSOR_max] = ++{ ++ {&i386_cost, 4, 3, 4, 3, 4}, ++ {&i486_cost, 16, 15, 16, 15, 16}, ++ {&pentium_cost, 16, 7, 16, 7, 16}, ++ {&pentiumpro_cost, 16, 15, 16, 10, 16}, ++ {&geode_cost, 0, 0, 0, 0, 0}, ++ {&k6_cost, 32, 7, 32, 7, 32}, ++ {&athlon_cost, 16, 7, 16, 7, 16}, ++ {&pentium4_cost, 0, 0, 0, 0, 0}, ++ {&k8_cost, 16, 7, 16, 7, 16}, ++ {&nocona_cost, 0, 0, 0, 0, 0}, ++ {&core2_cost, 16, 10, 16, 10, 16}, ++ {&generic32_cost, 16, 7, 16, 7, 16}, ++ {&generic64_cost, 16, 10, 16, 10, 16}, ++ {&amdfam10_cost, 32, 24, 32, 7, 32}, ++ {&atom_cost, 16, 7, 16, 7, 16} ++}; ++ ++static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = ++{ ++ "generic", ++ "i386", ++ "i486", ++ "pentium", ++ "pentium-mmx", ++ "pentiumpro", ++ "pentium2", ++ "pentium3", ++ "pentium4", ++ "pentium-m", ++ "prescott", ++ "nocona", ++ "core2", ++ "atom", ++ "geode", ++ "k6", ++ "k6-2", ++ "k6-3", ++ "athlon", ++ "athlon-4", ++ "k8", ++ "amdfam10" ++}; ++ ++ ++/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. ++ But in the case of vector types, it is some vector mode. ++ ++ When we have only some of our vector isa extensions enabled, then there ++ are some modes for which vector_mode_supported_p is false. For these ++ modes, the generic vector support in gcc will choose some non-vector mode ++ in order to implement the type. By computing the natural mode, we'll ++ select the proper ABI location for the operand and not depend on whatever ++ the middle-end decides to do with these vector types. ++ ++ The midde-end can't deal with the vector types > 16 bytes. In this ++ case, we return the original mode and warn ABI change if CUM isn't ++ NULL. */ ++ ++enum machine_mode ++type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum) ++{ ++ enum machine_mode mode = TYPE_MODE (type); ++ ++ if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) ++ { ++ HOST_WIDE_INT size = int_size_in_bytes (type); ++ if ((size == 8 || size == 16 || size == 32) ++ /* ??? Generic code allows us to create width 1 vectors. Ignore. */ ++ && TYPE_VECTOR_SUBPARTS (type) > 1) ++ { ++ enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); ++ ++ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) ++ mode = MIN_MODE_VECTOR_FLOAT; ++ else ++ mode = MIN_MODE_VECTOR_INT; ++ ++ /* Get the mode which has this inner mode and number of units. */ ++ for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) ++ if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) ++ && GET_MODE_INNER (mode) == innermode) ++ { ++ if (size == 32 && !TARGET_AVX) ++ { ++ static bool warnedavx; ++ ++ if (cum ++ && !warnedavx ++ && cum->warn_avx) ++ { ++ warnedavx = true; ++ warning (0, "AVX vector argument without AVX " ++ "enabled changes the ABI"); ++ } ++ return TYPE_MODE (type); ++ } ++ else ++ return mode; ++ } ++ ++ gcc_unreachable (); ++ } ++ } ++ ++ return mode; ++} ++ ++/* x86-64 register passing implementation. See x86-64 ABI for details. Goal ++ of this code is to classify each 8bytes of incoming argument by the register ++ class and assign registers accordingly. */ ++ ++/* Return the union class of CLASS1 and CLASS2. ++ See the x86-64 PS ABI for details. */ ++ ++static enum x86_64_reg_class ++merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) ++{ ++ /* Rule #1: If both classes are equal, this is the resulting class. */ ++ if (class1 == class2) ++ return class1; ++ ++ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is ++ the other class. */ ++ if (class1 == X86_64_NO_CLASS) ++ return class2; ++ if (class2 == X86_64_NO_CLASS) ++ return class1; ++ ++ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ ++ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) ++ return X86_64_MEMORY_CLASS; ++ ++ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ ++ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) ++ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) ++ return X86_64_INTEGERSI_CLASS; ++ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS ++ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) ++ return X86_64_INTEGER_CLASS; ++ ++ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, ++ MEMORY is used. */ ++ if (class1 == X86_64_X87_CLASS ++ || class1 == X86_64_X87UP_CLASS ++ || class1 == X86_64_COMPLEX_X87_CLASS ++ || class2 == X86_64_X87_CLASS ++ || class2 == X86_64_X87UP_CLASS ++ || class2 == X86_64_COMPLEX_X87_CLASS) ++ return X86_64_MEMORY_CLASS; ++ ++ /* Rule #6: Otherwise class SSE is used. */ ++ return X86_64_SSE_CLASS; ++} ++ ++/* Classify the argument of type TYPE and mode MODE. ++ CLASSES will be filled by the register class used to pass each word ++ of the operand. The number of words is returned. In case the parameter ++ should be passed in memory, 0 is returned. As a special case for zero ++ sized containers, classes[0] will be NO_CLASS and 1 is returned. ++ ++ BIT_OFFSET is used internally for handling records and specifies offset ++ of the offset in bits modulo 256 to avoid overflow cases. ++ ++ See the x86-64 PS ABI for details. ++*/ ++ ++int ++classify_argument (enum machine_mode mode, const_tree type, ++ enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) ++{ ++ HOST_WIDE_INT bytes = ++ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); ++ int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++ ++ /* Variable sized entities are always passed/returned in memory. */ ++ if (bytes < 0) ++ return 0; ++ ++ if (mode != VOIDmode ++ && targetm.calls.must_pass_in_stack (mode, type)) ++ return 0; ++ ++ if (type && AGGREGATE_TYPE_P (type)) ++ { ++ int i; ++ tree field; ++ enum x86_64_reg_class subclasses[MAX_CLASSES]; ++ ++ /* On x86-64 we pass structures larger than 32 bytes on the stack. */ ++ if (bytes > 32) ++ return 0; ++ ++ for (i = 0; i < words; i++) ++ classes[i] = X86_64_NO_CLASS; ++ ++ /* Zero sized arrays or structures are NO_CLASS. We return 0 to ++ signalize memory class, so handle it as special case. */ ++ if (!words) ++ { ++ classes[0] = X86_64_NO_CLASS; ++ return 1; ++ } ++ ++ /* Classify each field of record and merge classes. */ ++ switch (TREE_CODE (type)) ++ { ++ case RECORD_TYPE: ++ /* And now merge the fields of structure. */ ++ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ int num; ++ ++ if (TREE_TYPE (field) == error_mark_node) ++ continue; ++ ++ /* Bitfields are always classified as integer. Handle them ++ early, since later code would consider them to be ++ misaligned integers. */ ++ if (DECL_BIT_FIELD (field)) ++ { ++ for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; ++ i < ((int_bit_position (field) + (bit_offset % 64)) ++ + tree_low_cst (DECL_SIZE (field), 0) ++ + 63) / 8 / 8; i++) ++ classes[i] = ++ merge_classes (X86_64_INTEGER_CLASS, ++ classes[i]); ++ } ++ else ++ { ++ int pos; ++ ++ type = TREE_TYPE (field); ++ ++ /* Flexible array member is ignored. */ ++ if (TYPE_MODE (type) == BLKmode ++ && TREE_CODE (type) == ARRAY_TYPE ++ && TYPE_SIZE (type) == NULL_TREE ++ && TYPE_DOMAIN (type) != NULL_TREE ++ && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) ++ == NULL_TREE)) ++ { ++ static bool warned; ++ ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "The ABI of passing struct with" ++ " a flexible array member has" ++ " changed in GCC 4.4"); ++ } ++ continue; ++ } ++ num = classify_argument (TYPE_MODE (type), type, ++ subclasses, ++ (int_bit_position (field) ++ + bit_offset) % 256); ++ if (!num) ++ return 0; ++ pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; ++ for (i = 0; i < num && (i + pos) < words; i++) ++ classes[i + pos] = ++ merge_classes (subclasses[i], classes[i + pos]); ++ } ++ } ++ } ++ break; ++ ++ case ARRAY_TYPE: ++ /* Arrays are handled as small records. */ ++ { ++ int num; ++ num = classify_argument (TYPE_MODE (TREE_TYPE (type)), ++ TREE_TYPE (type), subclasses, bit_offset); ++ if (!num) ++ return 0; ++ ++ /* The partial classes are now full classes. */ ++ if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) ++ subclasses[0] = X86_64_SSE_CLASS; ++ if (subclasses[0] == X86_64_INTEGERSI_CLASS ++ && !((bit_offset % 64) == 0 && bytes == 4)) ++ subclasses[0] = X86_64_INTEGER_CLASS; ++ ++ for (i = 0; i < words; i++) ++ classes[i] = subclasses[i % num]; ++ ++ break; ++ } ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ /* Unions are similar to RECORD_TYPE but offset is always 0. ++ */ ++ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ int num; ++ ++ if (TREE_TYPE (field) == error_mark_node) ++ continue; ++ ++ num = classify_argument (TYPE_MODE (TREE_TYPE (field)), ++ TREE_TYPE (field), subclasses, ++ bit_offset); ++ if (!num) ++ return 0; ++ for (i = 0; i < num; i++) ++ classes[i] = merge_classes (subclasses[i], classes[i]); ++ } ++ } ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (words > 2) ++ { ++ /* When size > 16 bytes, if the first one isn't ++ X86_64_SSE_CLASS or any other ones aren't ++ X86_64_SSEUP_CLASS, everything should be passed in ++ memory. */ ++ if (classes[0] != X86_64_SSE_CLASS) ++ return 0; ++ ++ for (i = 1; i < words; i++) ++ if (classes[i] != X86_64_SSEUP_CLASS) ++ return 0; ++ } ++ ++ /* Final merger cleanup. */ ++ for (i = 0; i < words; i++) ++ { ++ /* If one class is MEMORY, everything should be passed in ++ memory. */ ++ if (classes[i] == X86_64_MEMORY_CLASS) ++ return 0; ++ ++ /* The X86_64_SSEUP_CLASS should be always preceded by ++ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ ++ if (classes[i] == X86_64_SSEUP_CLASS ++ && classes[i - 1] != X86_64_SSE_CLASS ++ && classes[i - 1] != X86_64_SSEUP_CLASS) ++ { ++ /* The first one should never be X86_64_SSEUP_CLASS. */ ++ gcc_assert (i != 0); ++ classes[i] = X86_64_SSE_CLASS; ++ } ++ ++ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, ++ everything should be passed in memory. */ ++ if (classes[i] == X86_64_X87UP_CLASS ++ && (classes[i - 1] != X86_64_X87_CLASS)) ++ { ++ static bool warned; ++ ++ /* The first one should never be X86_64_X87UP_CLASS. */ ++ gcc_assert (i != 0); ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "The ABI of passing union with long double" ++ " has changed in GCC 4.4"); ++ } ++ return 0; ++ } ++ } ++ return words; ++ } ++ ++ /* Compute alignment needed. We align all types to natural boundaries with ++ exception of XFmode that is aligned to 64bits. */ ++ if (mode != VOIDmode && mode != BLKmode) ++ { ++ int mode_alignment = GET_MODE_BITSIZE (mode); ++ ++ if (mode == XFmode) ++ mode_alignment = 128; ++ else if (mode == XCmode) ++ mode_alignment = 256; ++ if (COMPLEX_MODE_P (mode)) ++ mode_alignment /= 2; ++ /* Misaligned fields are always returned in memory. */ ++ if (bit_offset % mode_alignment) ++ return 0; ++ } ++ ++ /* for V1xx modes, just use the base mode */ ++ if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode ++ && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) ++ mode = GET_MODE_INNER (mode); ++ ++ /* Classification of atomic types. */ ++ switch (mode) ++ { ++ case SDmode: ++ case DDmode: ++ classes[0] = X86_64_SSE_CLASS; ++ return 1; ++ case TDmode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ return 2; ++ case DImode: ++ case SImode: ++ case HImode: ++ case QImode: ++ case CSImode: ++ case CHImode: ++ case CQImode: ++ { ++ int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode); ++ ++ if (size <= 32) ++ { ++ classes[0] = X86_64_INTEGERSI_CLASS; ++ return 1; ++ } ++ else if (size <= 64) ++ { ++ classes[0] = X86_64_INTEGER_CLASS; ++ return 1; ++ } ++ else if (size <= 64+32) ++ { ++ classes[0] = X86_64_INTEGER_CLASS; ++ classes[1] = X86_64_INTEGERSI_CLASS; ++ return 2; ++ } ++ else if (size <= 64+64) ++ { ++ classes[0] = classes[1] = X86_64_INTEGER_CLASS; ++ return 2; ++ } ++ else ++ gcc_unreachable (); ++ } ++ case CDImode: ++ case TImode: ++ classes[0] = classes[1] = X86_64_INTEGER_CLASS; ++ return 2; ++ case COImode: ++ case OImode: ++ /* OImode shouldn't be used directly. */ ++ gcc_unreachable (); ++ case CTImode: ++ return 0; ++ case SFmode: ++ if (!(bit_offset % 64)) ++ classes[0] = X86_64_SSESF_CLASS; ++ else ++ classes[0] = X86_64_SSE_CLASS; ++ return 1; ++ case DFmode: ++ classes[0] = X86_64_SSEDF_CLASS; ++ return 1; ++ case XFmode: ++ classes[0] = X86_64_X87_CLASS; ++ classes[1] = X86_64_X87UP_CLASS; ++ return 2; ++ case TFmode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ return 2; ++ case SCmode: ++ classes[0] = X86_64_SSE_CLASS; ++ if (!(bit_offset % 64)) ++ return 1; ++ else ++ { ++ static bool warned; ++ ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "The ABI of passing structure with complex float" ++ " member has changed in GCC 4.4"); ++ } ++ classes[1] = X86_64_SSESF_CLASS; ++ return 2; ++ } ++ case DCmode: ++ classes[0] = X86_64_SSEDF_CLASS; ++ classes[1] = X86_64_SSEDF_CLASS; ++ return 2; ++ case XCmode: ++ classes[0] = X86_64_COMPLEX_X87_CLASS; ++ return 1; ++ case TCmode: ++ /* This modes is larger than 16 bytes. */ ++ return 0; ++ case V8SFmode: ++ case V8SImode: ++ case V32QImode: ++ case V16HImode: ++ case V4DFmode: ++ case V4DImode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ classes[2] = X86_64_SSEUP_CLASS; ++ classes[3] = X86_64_SSEUP_CLASS; ++ return 4; ++ case V4SFmode: ++ case V4SImode: ++ case V16QImode: ++ case V8HImode: ++ case V2DFmode: ++ case V2DImode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ return 2; ++ case V1TImode: ++ case V1DImode: ++ case V2SFmode: ++ case V2SImode: ++ case V4HImode: ++ case V8QImode: ++ classes[0] = X86_64_SSE_CLASS; ++ return 1; ++ case BLKmode: ++ case VOIDmode: ++ return 0; ++ default: ++ gcc_assert (VECTOR_MODE_P (mode)); ++ ++ if (bytes > 16) ++ return 0; ++ ++ gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); ++ ++ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) ++ classes[0] = X86_64_INTEGERSI_CLASS; ++ else ++ classes[0] = X86_64_INTEGER_CLASS; ++ classes[1] = X86_64_INTEGER_CLASS; ++ return 1 + (bytes > 8); ++ } ++} ++ ++/* Examine the argument and return set number of register required in each ++ class. Return 0 iff parameter should be passed in memory. */ ++int ++examine_argument (enum machine_mode mode, const_tree type, int in_return, ++ int *int_nregs, int *sse_nregs) ++{ ++ enum x86_64_reg_class regclass[MAX_CLASSES]; ++ int n = classify_argument (mode, type, regclass, 0); ++ ++ *int_nregs = 0; ++ *sse_nregs = 0; ++ if (!n) ++ return 0; ++ for (n--; n >= 0; n--) ++ switch (regclass[n]) ++ { ++ case X86_64_INTEGER_CLASS: ++ case X86_64_INTEGERSI_CLASS: ++ (*int_nregs)++; ++ break; ++ case X86_64_SSE_CLASS: ++ case X86_64_SSESF_CLASS: ++ case X86_64_SSEDF_CLASS: ++ (*sse_nregs)++; ++ break; ++ case X86_64_NO_CLASS: ++ case X86_64_SSEUP_CLASS: ++ break; ++ case X86_64_X87_CLASS: ++ case X86_64_X87UP_CLASS: ++ if (!in_return) ++ return 0; ++ break; ++ case X86_64_COMPLEX_X87_CLASS: ++ return in_return ? 2 : 0; ++ case X86_64_MEMORY_CLASS: ++ gcc_unreachable (); ++ } ++ return 1; ++} ++ ++/* Return true when TYPE should be 128bit aligned for 32bit argument passing ++ ABI. */ ++bool ++contains_aligned_value_p (tree type) ++{ ++ enum machine_mode mode = TYPE_MODE (type); ++ if (((TARGET_SSE && SSE_REG_MODE_P (mode)) ++ || mode == TDmode ++ || mode == TFmode ++ || mode == TCmode) ++ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) ++ return true; ++ if (TYPE_ALIGN (type) < 128) ++ return false; ++ ++ if (AGGREGATE_TYPE_P (type)) ++ { ++ /* Walk the aggregates recursively. */ ++ switch (TREE_CODE (type)) ++ { ++ case RECORD_TYPE: ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ { ++ tree field; ++ ++ /* Walk all the structure fields. */ ++ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL ++ && contains_aligned_value_p (TREE_TYPE (field))) ++ return true; ++ } ++ break; ++ } ++ ++ case ARRAY_TYPE: ++ /* Just for use if some languages passes arrays by value. */ ++ if (contains_aligned_value_p (TREE_TYPE (type))) ++ return true; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ } ++ return false; ++} +