diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/bindings/python/clang/cindex.py llvm-toolchain-snapshot-6.0~svn316003/clang/bindings/python/clang/cindex.py --- llvm-toolchain-snapshot-6.0~svn315865/clang/bindings/python/clang/cindex.py 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/bindings/python/clang/cindex.py 2017-10-17 14:41:54.000000000 +0000 @@ -1587,6 +1587,16 @@ return StorageClass.from_id(self._storage_class) @property + def availability(self): + """ + Retrieves the availability of the entity pointed at by the cursor. + """ + if not hasattr(self, '_availability'): + self._availability = conf.lib.clang_getCursorAvailability(self) + + return AvailabilityKind.from_id(self._availability) + + @property def access_specifier(self): """ Retrieves the access specifier (if any) of the entity pointed at by the @@ -1923,6 +1933,24 @@ StorageClass.AUTO = StorageClass(6) StorageClass.REGISTER = StorageClass(7) +### Availability Kinds ### + +class AvailabilityKind(BaseEnumeration): + """ + Describes the availability of an entity. + """ + + # The unique kind objects, indexed by id. + _kinds = [] + _name_map = None + + def __repr__(self): + return 'AvailabilityKind.%s' % (self.name,) + +AvailabilityKind.AVAILABLE = AvailabilityKind(0) +AvailabilityKind.DEPRECATED = AvailabilityKind(1) +AvailabilityKind.NOT_AVAILABLE = AvailabilityKind(2) +AvailabilityKind.NOT_ACCESSIBLE = AvailabilityKind(3) ### C++ access specifiers ### @@ -3491,6 +3519,10 @@ [TranslationUnit, SourceLocation], Cursor), + ("clang_getCursorAvailability", + [Cursor], + c_int), + ("clang_getCursorDefinition", [Cursor], Cursor, @@ -4106,6 +4138,7 @@ register_enumerations() __all__ = [ + 'AvailabilityKind', 'Config', 'CodeCompletionResults', 'CompilationDatabase', diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/bindings/python/tests/cindex/test_cursor.py llvm-toolchain-snapshot-6.0~svn316003/clang/bindings/python/tests/cindex/test_cursor.py --- llvm-toolchain-snapshot-6.0~svn315865/clang/bindings/python/tests/cindex/test_cursor.py 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/bindings/python/tests/cindex/test_cursor.py 2017-10-17 14:41:54.000000000 +0000 @@ -1,6 +1,7 @@ import ctypes import gc +from clang.cindex import AvailabilityKind from clang.cindex import CursorKind from clang.cindex import TemplateArgumentKind from clang.cindex import TranslationUnit @@ -377,6 +378,26 @@ else: assert False, "Couldn't find annotation" +def test_annotation_template(): + annotation = '__attribute__ ((annotate("annotation")))' + for source, kind in [ + ('int foo (T value) %s;', CursorKind.FUNCTION_TEMPLATE), + ('class %s foo {};', CursorKind.CLASS_TEMPLATE), + ]: + source = 'template ' + (source % annotation) + tu = get_tu(source, lang="cpp") + + foo = get_cursor(tu, 'foo') + assert foo is not None + assert foo.kind == kind + + for c in foo.get_children(): + if c.kind == CursorKind.ANNOTATE_ATTR: + assert c.displayname == "annotation" + break + else: + assert False, "Couldn't find annotation for {}".format(kind) + def test_result_type(): tu = get_tu('int foo();') foo = get_cursor(tu, 'foo') @@ -385,6 +406,30 @@ t = foo.result_type assert t.kind == TypeKind.INT +def test_availability(): + tu = get_tu('class A { A(A const&) = delete; };', lang='cpp') + + # AvailabilityKind.AVAILABLE + cursor = get_cursor(tu, 'A') + assert cursor.kind == CursorKind.CLASS_DECL + assert cursor.availability == AvailabilityKind.AVAILABLE + + # AvailabilityKind.NOT_AVAILABLE + cursors = get_cursors(tu, 'A') + for c in cursors: + if c.kind == CursorKind.CONSTRUCTOR: + assert c.availability == AvailabilityKind.NOT_AVAILABLE + break + else: + assert False, "Could not find cursor for deleted constructor" + + # AvailabilityKind.DEPRECATED + tu = get_tu('void test() __attribute__((deprecated));', lang='cpp') + cursor = get_cursor(tu, 'test') + assert cursor.availability == AvailabilityKind.DEPRECATED + + # AvailabilityKind.NOT_ACCESSIBLE is only used in the code completion results + def test_get_tokens(): """Ensure we can map cursors back to tokens.""" tu = get_tu('int foo(int i);') diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/clang/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/clang/CMakeLists.txt 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/CMakeLists.txt 2017-10-17 14:41:54.000000000 +0000 @@ -235,6 +235,17 @@ set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING "Default OpenMP runtime used by -fopenmp.") +# OpenMP offloading requires at least sm_30 because we use shuffle instructions +# to generate efficient code for reductions. +set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs.") +string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}") +if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30") + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) +endif() + set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING "Vendor-specific text for showing with version information.") diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/docs/ReleaseNotes.rst llvm-toolchain-snapshot-6.0~svn316003/clang/docs/ReleaseNotes.rst --- llvm-toolchain-snapshot-6.0~svn315865/clang/docs/ReleaseNotes.rst 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/docs/ReleaseNotes.rst 2017-10-17 14:41:54.000000000 +0000 @@ -78,6 +78,10 @@ when the signed integer is coerced to an unsigned type for the comparison. ``-Wsign-compare`` was adjusted not to warn in this case. +- ``-Wtautological-constant-compare`` is a new warning that warns on + tautological comparisons between integer variable of the type ``T`` and the + largest/smallest possible integer constant of that same type. + - ``-Wnull-pointer-arithmetic`` now warns about performing pointer arithmetic on a null pointer. Such pointer arithmetic has an undefined behavior if the offset is nonzero. It also now warns about arithmetic on a null pointer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/AST/ASTContext.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/AST/ASTContext.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/AST/ASTContext.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/AST/ASTContext.h 2017-10-17 14:41:54.000000000 +0000 @@ -496,7 +496,7 @@ CXXABI *createCXXABI(const TargetInfo &T); /// \brief The logical -> physical address space map. - const LangAS::Map *AddrSpaceMap; + const LangASMap *AddrSpaceMap; /// \brief Address space map mangling must be used with language specific /// address spaces (e.g. OpenCL/CUDA) @@ -1070,7 +1070,7 @@ /// The resulting type has a union of the qualifiers from T and the address /// space. If T already has an address space specifier, it is silently /// replaced. - QualType getAddrSpaceQualType(QualType T, unsigned AddressSpace) const; + QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const; /// \brief Remove any existing address space on the type and returns the type /// with qualifiers intact (or that's the idea anyway) @@ -2363,14 +2363,14 @@ return getTargetAddressSpace(Q.getAddressSpace()); } - unsigned getTargetAddressSpace(unsigned AS) const; + unsigned getTargetAddressSpace(LangAS AS) const; /// Get target-dependent integer value for null pointer which is used for /// constant folding. uint64_t getTargetNullPointerValue(QualType QT) const; - bool addressSpaceMapManglingFor(unsigned AS) const { - return AddrSpaceMapMangling || AS >= LangAS::FirstTargetAddressSpace; + bool addressSpaceMapManglingFor(LangAS AS) const { + return AddrSpaceMapMangling || isTargetAddressSpace(AS); } private: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/AST/Type.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/AST/Type.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/AST/Type.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/AST/Type.h 2017-10-17 14:41:54.000000000 +0000 @@ -328,9 +328,11 @@ } bool hasAddressSpace() const { return Mask & AddressSpaceMask; } - unsigned getAddressSpace() const { return Mask >> AddressSpaceShift; } + LangAS getAddressSpace() const { + return static_cast(Mask >> AddressSpaceShift); + } bool hasTargetSpecificAddressSpace() const { - return getAddressSpace() >= LangAS::FirstTargetAddressSpace; + return isTargetAddressSpace(getAddressSpace()); } /// Get the address space attribute value to be printed by diagnostics. unsigned getAddressSpaceAttributePrintValue() const { @@ -338,22 +340,22 @@ // This function is not supposed to be used with language specific // address spaces. If that happens, the diagnostic message should consider // printing the QualType instead of the address space value. - assert(Addr == 0 || hasTargetSpecificAddressSpace()); - if (Addr) - return Addr - LangAS::FirstTargetAddressSpace; + assert(Addr == LangAS::Default || hasTargetSpecificAddressSpace()); + if (Addr != LangAS::Default) + return toTargetAddressSpace(Addr); // TODO: The diagnostic messages where Addr may be 0 should be fixed // since it cannot differentiate the situation where 0 denotes the default // address space or user specified __attribute__((address_space(0))). return 0; } - void setAddressSpace(unsigned space) { - assert(space <= MaxAddressSpace); + void setAddressSpace(LangAS space) { + assert((unsigned)space <= MaxAddressSpace); Mask = (Mask & ~AddressSpaceMask) | (((uint32_t) space) << AddressSpaceShift); } - void removeAddressSpace() { setAddressSpace(0); } - void addAddressSpace(unsigned space) { - assert(space); + void removeAddressSpace() { setAddressSpace(LangAS::Default); } + void addAddressSpace(LangAS space) { + assert(space != LangAS::Default); setAddressSpace(space); } @@ -1005,7 +1007,7 @@ } /// Return the address space of this type. - inline unsigned getAddressSpace() const; + inline LangAS getAddressSpace() const; /// Returns gc attribute of this type. inline Qualifiers::GC getObjCGCAttr() const; @@ -1230,7 +1232,7 @@ } bool hasAddressSpace() const { return Quals.hasAddressSpace(); } - unsigned getAddressSpace() const { return Quals.getAddressSpace(); } + LangAS getAddressSpace() const { return Quals.getAddressSpace(); } const Type *getBaseType() const { return BaseType; } @@ -5654,7 +5656,7 @@ } /// Return the address space of this type. -inline unsigned QualType::getAddressSpace() const { +inline LangAS QualType::getAddressSpace() const { return getQualifiers().getAddressSpace(); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/AddressSpaces.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/AddressSpaces.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/AddressSpaces.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/AddressSpaces.h 2017-10-17 14:41:54.000000000 +0000 @@ -16,14 +16,14 @@ #ifndef LLVM_CLANG_BASIC_ADDRESSSPACES_H #define LLVM_CLANG_BASIC_ADDRESSSPACES_H -namespace clang { +#include -namespace LangAS { +namespace clang { /// \brief Defines the address space values used by the address space qualifier /// of QualType. /// -enum ID { +enum class LangAS : unsigned { // The default value 0 is the value used in QualType for the the situation // where there is no address space qualifier. Default = 0, @@ -51,9 +51,24 @@ /// The type of a lookup table which maps from language-specific address spaces /// to target-specific ones. -typedef unsigned Map[FirstTargetAddressSpace]; +typedef unsigned LangASMap[(unsigned)LangAS::FirstTargetAddressSpace]; + +/// \return whether \p AS is a target-specific address space rather than a +/// clang AST address space +inline bool isTargetAddressSpace(LangAS AS) { + return (unsigned)AS >= (unsigned)LangAS::FirstTargetAddressSpace; } +inline unsigned toTargetAddressSpace(LangAS AS) { + assert(isTargetAddressSpace(AS)); + return (unsigned)AS - (unsigned)LangAS::FirstTargetAddressSpace; } +inline LangAS getLangASFromTargetAS(unsigned TargetAS) { + return static_cast((TargetAS) + + (unsigned)LangAS::FirstTargetAddressSpace); +} + +} // namespace clang + #endif diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/AllDiagnostics.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/AllDiagnostics.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/AllDiagnostics.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/AllDiagnostics.h 2017-10-17 14:41:54.000000000 +0000 @@ -25,6 +25,7 @@ #include "clang/Parse/ParseDiagnostic.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Serialization/SerializationDiagnostic.h" +#include "clang/Tooling/Refactoring/RefactoringDiagnostic.h" namespace clang { template diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/AttrDocs.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/AttrDocs.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/AttrDocs.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/AttrDocs.td 2017-10-17 14:41:54.000000000 +0000 @@ -1290,6 +1290,7 @@ def ARMInterruptDocs : Documentation { let Category = DocCatFunction; + let Heading = "interrupt (ARM)"; let Content = [{ Clang supports the GNU style ``__attribute__((interrupt("TYPE")))`` attribute on ARM targets. This attribute may be attached to a function definition and @@ -1331,6 +1332,7 @@ def MipsInterruptDocs : Documentation { let Category = DocCatFunction; + let Heading = "interrupt (MIPS)"; let Content = [{ Clang supports the GNU style ``__attribute__((interrupt("ARGUMENT")))`` attribute on MIPS targets. This attribute may be attached to a function definition and instructs @@ -1427,6 +1429,7 @@ def AVRInterruptDocs : Documentation { let Category = DocCatFunction; + let Heading = "interrupt (AVR)"; let Content = [{ Clang supports the GNU style ``__attribute__((interrupt))`` attribute on AVR targets. This attribute may be attached to a function definition and instructs @@ -2787,59 +2790,6 @@ }]; } -def AnyX86InterruptDocs : Documentation { - let Category = DocCatFunction; - let Content = [{ -Clang supports the GNU style ``__attribute__((interrupt))`` attribute on -x86/x86-64 targets.The compiler generates function entry and exit sequences -suitable for use in an interrupt handler when this attribute is present. -The 'IRET' instruction, instead of the 'RET' instruction, is used to return -from interrupt or exception handlers. All registers, except for the EFLAGS -register which is restored by the 'IRET' instruction, are preserved by the -compiler. - -Any interruptible-without-stack-switch code must be compiled with --mno-red-zone since interrupt handlers can and will, because of the -hardware design, touch the red zone. - -1. interrupt handler must be declared with a mandatory pointer argument: - - .. code-block:: c - - struct interrupt_frame - { - uword_t ip; - uword_t cs; - uword_t flags; - uword_t sp; - uword_t ss; - }; - - __attribute__ ((interrupt)) - void f (struct interrupt_frame *frame) { - ... - } - -2. exception handler: - - The exception handler is very similar to the interrupt handler with - a different mandatory function signature: - - .. code-block:: c - - __attribute__ ((interrupt)) - void f (struct interrupt_frame *frame, uword_t error_code) { - ... - } - - and compiler pops 'ERROR_CODE' off stack before the 'IRET' instruction. - - The exception handler should only be used for exceptions which push an - error code and all other exceptions must use the interrupt handler. - The system will crash if the wrong handler is used. - }]; -} - def AnyX86NoCallerSavedRegistersDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Attr.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Attr.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Attr.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Attr.td 2017-10-17 14:41:54.000000000 +0000 @@ -2044,7 +2044,7 @@ let Subjects = SubjectList<[HasFunctionProto]>; let ParseKind = "Interrupt"; let HasCustomParsing = 1; - let Documentation = [AnyX86InterruptDocs]; + let Documentation = [Undocumented]; } def AnyX86NoCallerSavedRegisters : InheritableAttr, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Builtins.def llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Builtins.def --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Builtins.def 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Builtins.def 2017-10-17 14:41:54.000000000 +0000 @@ -1008,9 +1008,9 @@ LIBBUILTIN(modff, "fff*", "fn", "math.h", ALL_LANGUAGES) LIBBUILTIN(modfl, "LdLdLd*", "fn", "math.h", ALL_LANGUAGES) -LIBBUILTIN(nan, "dcC*", "fn", "math.h", ALL_LANGUAGES) -LIBBUILTIN(nanf, "fcC*", "fn", "math.h", ALL_LANGUAGES) -LIBBUILTIN(nanl, "LdcC*", "fn", "math.h", ALL_LANGUAGES) +LIBBUILTIN(nan, "dcC*", "fUn", "math.h", ALL_LANGUAGES) +LIBBUILTIN(nanf, "fcC*", "fUn", "math.h", ALL_LANGUAGES) +LIBBUILTIN(nanl, "LdcC*", "fUn", "math.h", ALL_LANGUAGES) LIBBUILTIN(pow, "ddd", "fne", "math.h", ALL_LANGUAGES) LIBBUILTIN(powf, "fff", "fne", "math.h", ALL_LANGUAGES) @@ -1434,6 +1434,9 @@ BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut") BUILTIN(__builtin_os_log_format, "v*v*cC*.", "p:0:nt") +// OpenMP 4.0 +LANGBUILTIN(omp_is_initial_device, "i", "nc", OMP_LANG) + // Builtins for XRay BUILTIN(__xray_customevent, "vcC*z", "") diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Builtins.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Builtins.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Builtins.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Builtins.h 2017-10-17 14:41:54.000000000 +0000 @@ -38,6 +38,7 @@ MS_LANG = 0x10, // builtin requires MS mode. OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. + OMP_LANG = 0x80, // builtin requires OpenMP. ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/CMakeLists.txt 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/CMakeLists.txt 2017-10-17 14:41:54.000000000 +0000 @@ -14,6 +14,7 @@ clang_diag_gen(Frontend) clang_diag_gen(Lex) clang_diag_gen(Parse) +clang_diag_gen(Refactoring) clang_diag_gen(Sema) clang_diag_gen(Serialization) clang_tablegen(DiagnosticGroups.inc -gen-clang-diag-groups diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticDriverKinds.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticDriverKinds.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticDriverKinds.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticDriverKinds.td 2017-10-17 14:41:54.000000000 +0000 @@ -330,4 +330,8 @@ "unable to find a Visual Studio installation; " "try running Clang from a developer command prompt">, InGroup>; + +def warn_drv_fine_grained_bitfield_accesses_ignored : Warning< + "option '-ffine-grained-bitfield-accesses' cannot be enabled together with a sanitizer; flag ignored">, + InGroup; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticGroups.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticGroups.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticGroups.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticGroups.td 2017-10-17 14:41:54.000000000 +0000 @@ -432,13 +432,15 @@ def TautologicalUnsignedZeroCompare : DiagGroup<"tautological-unsigned-zero-compare">; def TautologicalUnsignedEnumZeroCompare : DiagGroup<"tautological-unsigned-enum-zero-compare">; def TautologicalOutOfRangeCompare : DiagGroup<"tautological-constant-out-of-range-compare">; +def TautologicalConstantCompare : DiagGroup<"tautological-constant-compare", + [TautologicalUnsignedZeroCompare, + TautologicalUnsignedEnumZeroCompare, + TautologicalOutOfRangeCompare]>; def TautologicalPointerCompare : DiagGroup<"tautological-pointer-compare">; def TautologicalOverlapCompare : DiagGroup<"tautological-overlap-compare">; def TautologicalUndefinedCompare : DiagGroup<"tautological-undefined-compare">; def TautologicalCompare : DiagGroup<"tautological-compare", - [TautologicalUnsignedZeroCompare, - TautologicalUnsignedEnumZeroCompare, - TautologicalOutOfRangeCompare, + [TautologicalConstantCompare, TautologicalPointerCompare, TautologicalOverlapCompare, TautologicalUndefinedCompare]>; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticIDs.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticIDs.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticIDs.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticIDs.h 2017-10-17 14:41:54.000000000 +0000 @@ -38,7 +38,8 @@ DIAG_SIZE_COMMENT = 100, DIAG_SIZE_CROSSTU = 100, DIAG_SIZE_SEMA = 3500, - DIAG_SIZE_ANALYSIS = 100 + DIAG_SIZE_ANALYSIS = 100, + DIAG_SIZE_REFACTORING = 1000, }; // Start position for diagnostics. enum { @@ -53,7 +54,8 @@ DIAG_START_CROSSTU = DIAG_START_COMMENT + DIAG_SIZE_CROSSTU, DIAG_START_SEMA = DIAG_START_CROSSTU + DIAG_SIZE_COMMENT, DIAG_START_ANALYSIS = DIAG_START_SEMA + DIAG_SIZE_SEMA, - DIAG_UPPER_LIMIT = DIAG_START_ANALYSIS + DIAG_SIZE_ANALYSIS + DIAG_START_REFACTORING = DIAG_START_ANALYSIS + DIAG_SIZE_ANALYSIS, + DIAG_UPPER_LIMIT = DIAG_START_REFACTORING + DIAG_SIZE_REFACTORING }; class CustomDiagInfo; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticRefactoringKinds.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticRefactoringKinds.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticRefactoringKinds.td 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticRefactoringKinds.td 2017-10-17 14:41:54.000000000 +0000 @@ -0,0 +1,25 @@ +//==--- DiagnosticRefactoringKinds.td - refactoring diagnostics -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Refactoring Diagnostics +//===----------------------------------------------------------------------===// + +let Component = "Refactoring" in { + +let CategoryName = "Refactoring Invocation Issue" in { + +def err_refactor_no_selection : Error<"refactoring action can't be initiated " + "without a selection">; +def err_refactor_selection_no_symbol : Error<"there is no symbol at the given " + "location">; + +} + +} // end of Refactoring diagnostics diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticSemaKinds.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticSemaKinds.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/DiagnosticSemaKinds.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/DiagnosticSemaKinds.td 2017-10-17 14:41:54.000000000 +0000 @@ -5938,18 +5938,18 @@ "member function %q1 is declared const here|" "%select{|nested }1data member %2 declared const here}0">; -def warn_lunsigned_always_true_comparison : Warning< - "comparison of unsigned expression %0 is always %select{false|true}1">, +def warn_unsigned_always_true_comparison : Warning< + "comparison of %select{%3|unsigned expression}0 %2 " + "%select{unsigned expression|%3}0 is always %select{false|true}4">, InGroup; -def warn_runsigned_always_true_comparison : Warning< - "comparison of %0 unsigned expression is always %select{false|true}1">, - InGroup; -def warn_lunsigned_enum_always_true_comparison : Warning< - "comparison of unsigned enum expression %0 is always %select{false|true}1">, - InGroup; -def warn_runsigned_enum_always_true_comparison : Warning< - "comparison of %0 unsigned enum expression is always %select{false|true}1">, +def warn_unsigned_enum_always_true_comparison : Warning< + "comparison of %select{%3|unsigned enum expression}0 %2 " + "%select{unsigned enum expression|%3}0 is always %select{false|true}4">, InGroup; +def warn_tautological_constant_compare : Warning< + "comparison %select{%3|%1}0 %2 " + "%select{%1|%3}0 is always %select{false|true}4">, + InGroup; def warn_mixed_sign_comparison : Warning< "comparison of integers of different signs: %0 and %1">, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Diagnostic.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Diagnostic.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/Diagnostic.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/Diagnostic.td 2017-10-17 14:41:54.000000000 +0000 @@ -138,6 +138,7 @@ include "DiagnosticFrontendKinds.td" include "DiagnosticLexKinds.td" include "DiagnosticParseKinds.td" +include "DiagnosticRefactoringKinds.td" include "DiagnosticSemaKinds.td" include "DiagnosticSerializationKinds.td" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/TargetInfo.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/TargetInfo.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Basic/TargetInfo.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Basic/TargetInfo.h 2017-10-17 14:41:54.000000000 +0000 @@ -86,7 +86,7 @@ *LongDoubleFormat, *Float128Format; unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; - const LangAS::Map *AddrSpaceMap; + const LangASMap *AddrSpaceMap; mutable StringRef PlatformName; mutable VersionTuple PlatformMinVersion; @@ -322,9 +322,7 @@ /// \brief Get integer value for null pointer. /// \param AddrSpace address space of pointee in source language. - virtual uint64_t getNullPointerValue(unsigned AddrSpace) const { - return 0; - } + virtual uint64_t getNullPointerValue(LangAS AddrSpace) const { return 0; } /// \brief Return the size of '_Bool' and C++ 'bool' for this target, in bits. unsigned getBoolWidth() const { return BoolWidth; } @@ -971,15 +969,13 @@ return nullptr; } - const LangAS::Map &getAddressSpaceMap() const { - return *AddrSpaceMap; - } + const LangASMap &getAddressSpaceMap() const { return *AddrSpaceMap; } /// \brief Return an AST address space which can be used opportunistically /// for constant global memory. It must be possible to convert pointers into /// this address space to LangAS::Default. If no such address space exists, /// this may return None, and such optimizations will be disabled. - virtual llvm::Optional getConstantAddressSpace() const { + virtual llvm::Optional getConstantAddressSpace() const { return LangAS::Default; } @@ -1058,7 +1054,7 @@ } /// \brief Get address space for OpenCL type. - virtual LangAS::ID getOpenCLTypeAddrSpace(const Type *T) const; + virtual LangAS getOpenCLTypeAddrSpace(const Type *T) const; /// \returns Target specific vtbl ptr address space. virtual unsigned getVtblPtrAddressSpace() const { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Config/config.h.cmake llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Config/config.h.cmake --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Config/config.h.cmake 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Config/config.h.cmake 2017-10-17 14:41:54.000000000 +0000 @@ -20,6 +20,9 @@ /* Default OpenMP runtime used by -fopenmp. */ #define CLANG_DEFAULT_OPENMP_RUNTIME "${CLANG_DEFAULT_OPENMP_RUNTIME}" +/* Default architecture for OpenMP offloading to Nvidia GPUs. */ +#define CLANG_OPENMP_NVPTX_DEFAULT_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}" + /* Multilib suffix for libdir. */ #define CLANG_LIBDIR_SUFFIX "${CLANG_LIBDIR_SUFFIX}" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Driver/Options.td llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Driver/Options.td --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Driver/Options.td 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Driver/Options.td 2017-10-17 14:41:54.000000000 +0000 @@ -1045,6 +1045,13 @@ Group, Flags<[CC1Option]>, HelpText<"Filename defining the whitelist for imbuing the 'never instrument' XRay attribute.">; +def ffine_grained_bitfield_accesses : Flag<["-"], + "ffine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>, + HelpText<"Use separate accesses for bitfields with legal widths and alignments.">; +def fno_fine_grained_bitfield_accesses : Flag<["-"], + "fno-fine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>, + HelpText<"Use large-integer access for consecutive bitfield runs.">; + def flat__namespace : Flag<["-"], "flat_namespace">; def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group; def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Driver/SanitizerArgs.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Driver/SanitizerArgs.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Driver/SanitizerArgs.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Driver/SanitizerArgs.h 2017-10-17 14:41:54.000000000 +0000 @@ -44,6 +44,8 @@ bool TsanFuncEntryExit = true; bool TsanAtomics = true; bool MinimalRuntime = false; + // True if cross-dso CFI support if provided by the system (i.e. Android). + bool ImplicitCfiRuntime = false; public: /// Parses the sanitizer arguments from an argument list. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Frontend/CodeGenOptions.def llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Frontend/CodeGenOptions.def --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Frontend/CodeGenOptions.def 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Frontend/CodeGenOptions.def 2017-10-17 14:41:54.000000000 +0000 @@ -179,6 +179,7 @@ CODEGENOPT(SanitizeStats , 1, 0) ///< Collect statistics for sanitizers. CODEGENOPT(SimplifyLibCalls , 1, 1) ///< Set when -fbuiltin is enabled. CODEGENOPT(SoftFloat , 1, 0) ///< -soft-float. +CODEGENOPT(FineGrainedBitfieldAccesses, 1, 0) ///< Enable fine-grained bitfield accesses. CODEGENOPT(StrictEnums , 1, 0) ///< Optimize based on strict enum definition. CODEGENOPT(StrictVTablePointers, 1, 0) ///< Optimize based on the strict vtable pointers CODEGENOPT(TimePasses , 1, 0) ///< Set when -ftime-report is enabled. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Frontend/CommandLineSourceLoc.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Frontend/CommandLineSourceLoc.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Frontend/CommandLineSourceLoc.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Frontend/CommandLineSourceLoc.h 2017-10-17 14:41:54.000000000 +0000 @@ -51,6 +51,52 @@ } }; +/// A source range that has been parsed on the command line. +struct ParsedSourceRange { + std::string FileName; + /// The starting location of the range. The first element is the line and + /// the second element is the column. + std::pair Begin; + /// The ending location of the range. The first element is the line and the + /// second element is the column. + std::pair End; + + /// Returns a parsed source range from a string or None if the string is + /// invalid. + /// + /// These source string has the following format: + /// + /// file:start_line:start_column[-end_line:end_column] + /// + /// If the end line and column are omitted, the starting line and columns + /// are used as the end values. + static Optional fromString(StringRef Str) { + std::pair RangeSplit = Str.rsplit('-'); + unsigned EndLine, EndColumn; + bool HasEndLoc = false; + if (!RangeSplit.second.empty()) { + std::pair Split = RangeSplit.second.rsplit(':'); + if (Split.first.getAsInteger(10, EndLine) || + Split.second.getAsInteger(10, EndColumn)) { + // The string does not end in end_line:end_column, so the '-' + // probably belongs to the filename which menas the whole + // string should be parsed. + RangeSplit.first = Str; + } else + HasEndLoc = true; + } + auto Begin = ParsedSourceLocation::FromString(RangeSplit.first); + if (Begin.FileName.empty()) + return None; + if (!HasEndLoc) { + EndLine = Begin.Line; + EndColumn = Begin.Column; + } + return ParsedSourceRange{std::move(Begin.FileName), + {Begin.Line, Begin.Column}, + {EndLine, EndColumn}}; + } +}; } namespace llvm { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/module.modulemap llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/module.modulemap --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/module.modulemap 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/module.modulemap 2017-10-17 14:41:54.000000000 +0000 @@ -71,6 +71,7 @@ module Parse { header "Parse/ParseDiagnostic.h" export * } module Sema { header "Sema/SemaDiagnostic.h" export * } module Serialization { header "Serialization/SerializationDiagnostic.h" export * } + module Refactoring { header "Tooling/Refactoring/RefactoringDiagnostic.h" export * } } module Clang_Driver { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h 2017-10-17 14:41:54.000000000 +0000 @@ -11,6 +11,7 @@ #define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_ACTION_RULE_REQUIREMENTS_H #include "clang/Basic/LLVM.h" +#include "clang/Tooling/Refactoring/RefactoringDiagnostic.h" #include "clang/Tooling/Refactoring/RefactoringOption.h" #include "clang/Tooling/Refactoring/RefactoringRuleContext.h" #include "llvm/Support/Error.h" @@ -47,10 +48,7 @@ Expected evaluate(RefactoringRuleContext &Context) const { if (Context.getSelectionRange().isValid()) return Context.getSelectionRange(); - // FIXME: Use a diagnostic. - return llvm::make_error( - "refactoring action can't be initiated without a selection", - llvm::inconvertibleErrorCode()); + return Context.createDiagnosticError(diag::err_refactor_no_selection); } }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Tooling/Refactoring/RefactoringDiagnostic.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Tooling/Refactoring/RefactoringDiagnostic.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Tooling/Refactoring/RefactoringDiagnostic.h 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Tooling/Refactoring/RefactoringDiagnostic.h 2017-10-17 14:41:54.000000000 +0000 @@ -0,0 +1,30 @@ +//===--- RefactoringDiagnostic.h - ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGDIAGNOSTIC_H +#define LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGDIAGNOSTIC_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/PartialDiagnostic.h" + +namespace clang { +namespace diag { +enum { +#define DIAG(ENUM, FLAGS, DEFAULT_MAPPING, DESC, GROUP, SFINAE, NOWERROR, \ + SHOWINSYSHEADER, CATEGORY) \ + ENUM, +#define REFACTORINGSTART +#include "clang/Basic/DiagnosticRefactoringKinds.inc" +#undef DIAG + NUM_BUILTIN_REFACTORING_DIAGNOSTICS +}; +} // end namespace diag +} // end namespace clang + +#endif // LLVM_CLANG_TOOLING_REFACTORING_REFACTORINGDIAGNOSTIC_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h 2017-10-17 14:41:54.000000000 +0000 @@ -10,6 +10,7 @@ #ifndef LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RULE_CONTEXT_H #define LLVM_CLANG_TOOLING_REFACTOR_REFACTORING_RULE_CONTEXT_H +#include "clang/Basic/DiagnosticError.h" #include "clang/Basic/SourceManager.h" namespace clang { @@ -50,6 +51,17 @@ void setASTContext(ASTContext &Context) { AST = &Context; } + /// Creates an llvm::Error value that contains a diagnostic. + /// + /// The errors should not outlive the context. + llvm::Error createDiagnosticError(SourceLocation Loc, unsigned DiagID) { + return DiagnosticError::create(Loc, PartialDiagnostic(DiagID, DiagStorage)); + } + + llvm::Error createDiagnosticError(unsigned DiagID) { + return createDiagnosticError(SourceLocation(), DiagID); + } + private: /// The source manager for the translation unit / file on which a refactoring /// action might operate on. @@ -60,6 +72,8 @@ /// An optional AST for the translation unit on which a refactoring action /// might operate on. ASTContext *AST = nullptr; + /// The allocator for diagnostics. + PartialDiagnostic::StorageAllocator DiagStorage; }; } // end namespace tooling diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/ASTContext.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/ASTContext.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/ASTContext.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/ASTContext.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -697,8 +697,8 @@ llvm_unreachable("Invalid CXXABI type!"); } -static const LangAS::Map *getAddressSpaceMap(const TargetInfo &T, - const LangOptions &LOpts) { +static const LangASMap *getAddressSpaceMap(const TargetInfo &T, + const LangOptions &LOpts) { if (LOpts.FakeAddressSpaceMap) { // The fake address space map must have a distinct entry for each // language-specific address space. @@ -2283,8 +2283,8 @@ return QualType(eq, fastQuals); } -QualType -ASTContext::getAddrSpaceQualType(QualType T, unsigned AddressSpace) const { +QualType ASTContext::getAddrSpaceQualType(QualType T, + LangAS AddressSpace) const { QualType CanT = getCanonicalType(T); if (CanT.getAddressSpace() == AddressSpace) return T; @@ -8870,8 +8870,8 @@ char *End; unsigned AddrSpace = strtoul(Str, &End, 10); if (End != Str && AddrSpace != 0) { - Type = Context.getAddrSpaceQualType( - Type, AddrSpace + LangAS::FirstTargetAddressSpace); + Type = Context.getAddrSpaceQualType(Type, + getLangASFromTargetAS(AddrSpace)); Str = End; } if (c == '*') @@ -9694,20 +9694,20 @@ } uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const { - unsigned AS; + LangAS AS; if (QT->getUnqualifiedDesugaredType()->isNullPtrType()) - AS = 0; + AS = LangAS::Default; else AS = QT->getPointeeType().getAddressSpace(); return getTargetInfo().getNullPointerValue(AS); } -unsigned ASTContext::getTargetAddressSpace(unsigned AS) const { - if (AS >= LangAS::FirstTargetAddressSpace) - return AS - LangAS::FirstTargetAddressSpace; +unsigned ASTContext::getTargetAddressSpace(LangAS AS) const { + if (isTargetAddressSpace(AS)) + return toTargetAddressSpace(AS); else - return (*AddrSpaceMap)[AS]; + return (*AddrSpaceMap)[(unsigned)AS]; } // Explicitly instantiate this in case a Redeclarable is used from a TU that diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/ExprConstant.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/ExprConstant.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/ExprConstant.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/ExprConstant.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -7929,6 +7929,9 @@ return BuiltinOp == Builtin::BI__atomic_always_lock_free ? Success(0, E) : Error(E); } + case Builtin::BIomp_is_initial_device: + // We can decide statically which value the runtime would return if called. + return Success(Info.getLangOpts().OpenMPIsDevice ? 0 : 1, E); } } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/ItaniumMangle.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/ItaniumMangle.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/ItaniumMangle.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/ItaniumMangle.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -2222,7 +2222,7 @@ // ::= U SmallString<64> ASString; - unsigned AS = Quals.getAddressSpace(); + LangAS AS = Quals.getAddressSpace(); if (Context.getASTContext().addressSpaceMapManglingFor(AS)) { // ::= "AS" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/RecordLayoutBuilder.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/RecordLayoutBuilder.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/RecordLayoutBuilder.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/RecordLayoutBuilder.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -1731,7 +1731,7 @@ const ArrayType* ATy = Context.getAsArrayType(D->getType()); FieldAlign = Context.getTypeAlignInChars(ATy->getElementType()); } else if (const ReferenceType *RT = D->getType()->getAs()) { - unsigned AS = RT->getPointeeType().getAddressSpace(); + unsigned AS = Context.getTargetAddressSpace(RT->getPointeeType()); FieldSize = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(AS)); FieldAlign = diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/TypePrinter.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/TypePrinter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/AST/TypePrinter.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/AST/TypePrinter.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -1320,7 +1320,9 @@ default: llvm_unreachable("This attribute should have been handled already"); case AttributedType::attr_address_space: OS << "address_space("; - OS << T->getEquivalentType().getAddressSpace(); + // FIXME: printing the raw LangAS value is wrong. This should probably + // use the same code as Qualifiers::print() + OS << (unsigned)T->getEquivalentType().getAddressSpace(); OS << ')'; break; @@ -1645,7 +1647,7 @@ if (getCVRQualifiers()) return false; - if (getAddressSpace()) + if (getAddressSpace() != LangAS::Default) return false; if (getObjCGCAttr()) @@ -1676,7 +1678,8 @@ OS << "__unaligned"; addSpace = true; } - if (unsigned addrspace = getAddressSpace()) { + LangAS addrspace = getAddressSpace(); + if (addrspace != LangAS::Default) { if (addrspace != LangAS::opencl_private) { if (addSpace) OS << ' '; @@ -1704,9 +1707,8 @@ OS << "__shared"; break; default: - assert(addrspace >= LangAS::FirstTargetAddressSpace); OS << "__attribute__((address_space("; - OS << addrspace - LangAS::FirstTargetAddressSpace; + OS << toTargetAddressSpace(addrspace); OS << ")))"; } } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/Builtins.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/Builtins.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/Builtins.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/Builtins.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -75,8 +75,9 @@ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; bool OclCUnsupported = !LangOpts.OpenCL && (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); + bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && - !OclC1Unsupported && !OclC2Unsupported && + !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/DiagnosticIDs.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/DiagnosticIDs.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/DiagnosticIDs.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/DiagnosticIDs.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -43,7 +43,7 @@ unsigned SFINAE : 2; unsigned WarnNoWerror : 1; unsigned WarnShowInSystemHeader : 1; - unsigned Category : 5; + unsigned Category : 6; uint16_t OptionGroupIndex; @@ -88,6 +88,7 @@ VALIDATE_DIAG_SIZE(COMMENT) VALIDATE_DIAG_SIZE(SEMA) VALIDATE_DIAG_SIZE(ANALYSIS) +VALIDATE_DIAG_SIZE(REFACTORING) #undef VALIDATE_DIAG_SIZE #undef STRINGIFY_NAME @@ -112,6 +113,7 @@ #include "clang/Basic/DiagnosticCrossTUKinds.inc" #include "clang/Basic/DiagnosticSemaKinds.inc" #include "clang/Basic/DiagnosticAnalysisKinds.inc" +#include "clang/Basic/DiagnosticRefactoringKinds.inc" #undef DIAG }; @@ -150,6 +152,7 @@ CATEGORY(CROSSTU, COMMENT) CATEGORY(SEMA, CROSSTU) CATEGORY(ANALYSIS, SEMA) +CATEGORY(REFACTORING, ANALYSIS) #undef CATEGORY // Avoid out of bounds reads. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/TargetInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/TargetInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/TargetInfo.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/TargetInfo.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -23,7 +23,7 @@ #include using namespace clang; -static const LangAS::Map DefaultAddrSpaceMap = { 0 }; +static const LangASMap DefaultAddrSpaceMap = {0}; // TargetInfo Constructor. TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) { @@ -356,7 +356,7 @@ return true; } -LangAS::ID TargetInfo::getOpenCLTypeAddrSpace(const Type *T) const { +LangAS TargetInfo::getOpenCLTypeAddrSpace(const Type *T) const { auto BT = dyn_cast(T); if (!BT) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/Targets/AMDGPU.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/Targets/AMDGPU.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/Targets/AMDGPU.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/Targets/AMDGPU.cpp 2017-10-17 14:41:54.000000000 +0000 @@ -42,7 +42,7 @@ "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; -static const LangAS::Map AMDGPUPrivIsZeroDefIsGenMap = { +static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = { 4, // Default 1, // opencl_global 3, // opencl_local @@ -54,7 +54,7 @@ 3 // cuda_shared }; -static const LangAS::Map AMDGPUGenIsZeroDefIsGenMap = { +static const LangASMap AMDGPUGenIsZeroDefIsGenMap = { 0, // Default 1, // opencl_global 3, // opencl_local @@ -66,7 +66,7 @@ 3 // cuda_shared }; -static const LangAS::Map AMDGPUPrivIsZeroDefIsPrivMap = { +static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = { 0, // Default 1, // opencl_global 3, // opencl_local @@ -78,7 +78,7 @@ 3 // cuda_shared }; -static const LangAS::Map AMDGPUGenIsZeroDefIsPrivMap = { +static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = { 5, // Default 1, // opencl_global 3, // opencl_local diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/Targets/AMDGPU.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/Targets/AMDGPU.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Basic/Targets/AMDGPU.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Basic/Targets/AMDGPU.h 2017-10-17 14:41:54.000000000 +0000 @@ -258,7 +258,7 @@ } } - LangAS::ID getOpenCLTypeAddrSpace(const Type *T) const override { + LangAS getOpenCLTypeAddrSpace(const Type *T) const override { auto BT = dyn_cast(T); if (!BT) @@ -279,8 +279,8 @@ } } - llvm::Optional getConstantAddressSpace() const override { - return LangAS::FirstTargetAddressSpace + AS.Constant; + llvm::Optional getConstantAddressSpace() const override { + return getLangASFromTargetAS(AS.Constant); } /// \returns Target specific vtbl ptr address space. @@ -318,7 +318,7 @@ // In amdgcn target the null pointer in global, constant, and generic // address space has value 0 but in private and local address space has // value ~0. - uint64_t getNullPointerValue(unsigned AS) const override { + uint64_t getNullPointerValue(LangAS AS) const override { return AS == LangAS::opencl_local ? ~0 : 0; } }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGAtomic.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGAtomic.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGAtomic.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGAtomic.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -96,9 +96,8 @@ BFI.StorageSize = AtomicSizeInBits; BFI.StorageOffset += OffsetInChars; LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()), - BFI, lvalue.getType(), - lvalue.getBaseInfo()); - LVal.setTBAAInfo(lvalue.getTBAAInfo()); + BFI, lvalue.getType(), lvalue.getBaseInfo(), + lvalue.getTBAAInfo()); AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned); if (AtomicTy.isNull()) { llvm::APInt Size( @@ -1227,7 +1226,8 @@ return RValue::get(nullptr); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1299,7 +1299,8 @@ assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1346,15 +1347,15 @@ if (LVal.isBitField()) return CGF.EmitLoadOfBitfieldLValue( LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(), - LVal.getBaseInfo()), loc); + LVal.getBaseInfo(), TBAAAccessInfo()), loc); if (LVal.isVectorElt()) return CGF.EmitLoadOfLValue( LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(), - LVal.getBaseInfo()), loc); + LVal.getBaseInfo(), TBAAAccessInfo()), loc); assert(LVal.isExtVectorElt()); return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt( addr, LVal.getExtVectorElts(), LVal.getType(), - LVal.getBaseInfo())); + LVal.getBaseInfo(), TBAAAccessInfo())); } RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, @@ -1670,29 +1671,30 @@ UpdateLVal = LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), - AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else if (AtomicLVal.isVectorElt()) { UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeVectorElt( DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } else { assert(AtomicLVal.isExtVectorElt()); UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); - DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation()); } // Store new value in the corresponding memory area @@ -1775,20 +1777,19 @@ if (AtomicLVal.isBitField()) { DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), - AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else if (AtomicLVal.isVectorElt()) { DesiredLVal = LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(), - AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else { assert(AtomicLVal.isExtVectorElt()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); // Store new value in the corresponding memory area assert(UpdateRVal.isScalar()); CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGBlocks.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGBlocks.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGBlocks.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGBlocks.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -309,10 +309,12 @@ if (CGM.getLangOpts().OpenCL) { // The header is basically 'struct { int; int; generic void *; // custom_fields; }'. Assert that struct is packed. - auto GenPtrAlign = CharUnits::fromQuantity( - CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8); - auto GenPtrSize = CharUnits::fromQuantity( - CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8); + auto GenericAS = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto GenPtrAlign = + CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); + auto GenPtrSize = + CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); assert(CGM.getIntSize() <= GenPtrSize); assert(CGM.getIntAlign() <= GenPtrAlign); assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); @@ -775,9 +777,11 @@ bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; auto GenVoidPtrTy = IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; - unsigned GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; + LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; auto GenVoidPtrSize = CharUnits::fromQuantity( - CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8); + CGM.getTarget().getPointerWidth( + CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / + 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); auto *InvokeFn = CodeGenFunction(CGM, true).GenerateBlockFunction( diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGDecl.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGDecl.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGDecl.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGDecl.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -222,7 +222,7 @@ Name = getStaticDeclName(*this, D); llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty); - unsigned AS = GetGlobalVarAddressSpace(&D); + LangAS AS = GetGlobalVarAddressSpace(&D); unsigned TargetAS = getContext().getTargetAddressSpace(AS); // Local address space cannot have an initializer. @@ -252,7 +252,7 @@ } // Make sure the result is of the correct type. - unsigned ExpectedAS = Ty.getAddressSpace(); + LangAS ExpectedAS = Ty.getAddressSpace(); llvm::Constant *Addr = GV; if (AS != ExpectedAS) { Addr = getTargetCodeGenInfo().performAddrSpaceCast( diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGExprConstant.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGExprConstant.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGExprConstant.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGExprConstant.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -612,7 +612,7 @@ CGM.getAddrOfConstantCompoundLiteralIfEmitted(E)) return ConstantAddress(Addr, Align); - unsigned addressSpace = E->getType().getAddressSpace(); + LangAS addressSpace = E->getType().getAddressSpace(); ConstantEmitter emitter(CGM, CGF); llvm::Constant *C = emitter.tryEmitForInitializer(E->getInitializer(), @@ -725,8 +725,8 @@ case CK_AddressSpaceConversion: { auto C = Emitter.tryEmitPrivate(subExpr, subExpr->getType()); if (!C) return nullptr; - unsigned destAS = E->getType()->getPointeeType().getAddressSpace(); - unsigned srcAS = subExpr->getType()->getPointeeType().getAddressSpace(); + LangAS destAS = E->getType()->getPointeeType().getAddressSpace(); + LangAS srcAS = subExpr->getType()->getPointeeType().getAddressSpace(); llvm::Type *destTy = ConvertType(E->getType()); return CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGM, C, srcAS, destAS, destTy); @@ -1184,14 +1184,14 @@ } llvm::Constant *ConstantEmitter::tryEmitForInitializer(const Expr *E, - unsigned destAddrSpace, + LangAS destAddrSpace, QualType destType) { initializeNonAbstract(destAddrSpace); return markIfFailed(tryEmitPrivateForMemory(E, destType)); } llvm::Constant *ConstantEmitter::emitForInitializer(const APValue &value, - unsigned destAddrSpace, + LangAS destAddrSpace, QualType destType) { initializeNonAbstract(destAddrSpace); auto C = tryEmitPrivateForMemory(value, destType); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGExpr.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGExpr.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGExpr.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGExpr.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -916,7 +916,8 @@ /// EmitPointerWithAlignment - Given an expression of pointer type, try to /// derive a more accurate bound on the alignment of the pointer. Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, - LValueBaseInfo *BaseInfo) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { // We allow this with ObjC object pointers because of fragile ABIs. assert(E->getType()->isPointerType() || E->getType()->isObjCObjectPointerType()); @@ -936,20 +937,30 @@ if (PtrTy->getPointeeType()->isVoidType()) break; - LValueBaseInfo InnerInfo; - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerInfo); - if (BaseInfo) *BaseInfo = InnerInfo; - - // If this is an explicit bitcast, and the source l-value is - // opaque, honor the alignment of the casted-to type. - if (isa(CE) && - InnerInfo.getAlignmentSource() != AlignmentSource::Decl) { - LValueBaseInfo ExpInfo; + LValueBaseInfo InnerBaseInfo; + TBAAAccessInfo InnerTBAAInfo; + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), + &InnerBaseInfo, + &InnerTBAAInfo); + if (BaseInfo) *BaseInfo = InnerBaseInfo; + if (TBAAInfo) *TBAAInfo = InnerTBAAInfo; + + if (isa(CE)) { + LValueBaseInfo TargetTypeBaseInfo; + TBAAAccessInfo TargetTypeTBAAInfo; CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), - &ExpInfo); - if (BaseInfo) - BaseInfo->mergeForCast(ExpInfo); - Addr = Address(Addr.getPointer(), Align); + &TargetTypeBaseInfo, + &TargetTypeTBAAInfo); + if (TBAAInfo) + *TBAAInfo = CGM.mergeTBAAInfoForCast(*TBAAInfo, + TargetTypeTBAAInfo); + // If the source l-value is opaque, honor the alignment of the + // casted-to type. + if (InnerBaseInfo.getAlignmentSource() != AlignmentSource::Decl) { + if (BaseInfo) + BaseInfo->mergeForCast(TargetTypeBaseInfo); + Addr = Address(Addr.getPointer(), Align); + } } if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && @@ -969,12 +980,13 @@ // Array-to-pointer decay. case CK_ArrayToPointerDecay: - return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo); + return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo, TBAAInfo); // Derived-to-base conversions. case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo); + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo, + TBAAInfo); auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl(); return GetAddressOfBaseClass(Addr, Derived, CE->path_begin(), CE->path_end(), @@ -994,6 +1006,7 @@ if (UO->getOpcode() == UO_AddrOf) { LValue LV = EmitLValue(UO->getSubExpr()); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); + if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); return LV.getAddress(); } } @@ -1001,7 +1014,8 @@ // TODO: conditional operators, comma. // Otherwise, use the alignment of the type. - CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo); + CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, + TBAAInfo); return Address(EmitScalarExpr(E), Align); } @@ -2447,8 +2461,10 @@ assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type"); LValueBaseInfo BaseInfo; - Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo); - LValue LV = MakeAddrLValue(Addr, T, BaseInfo, CGM.getTBAAAccessInfo(T)); + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo, + &TBAAInfo); + LValue LV = MakeAddrLValue(Addr, T, BaseInfo, TBAAInfo); LV.getQuals().setAddressSpace(ExprTy.getAddressSpace()); // We should not generate __weak write barrier on indirect reference @@ -3048,7 +3064,8 @@ } Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, - LValueBaseInfo *BaseInfo) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { assert(E->getType()->isArrayType() && "Array to pointer decay must have array source type!"); @@ -3056,6 +3073,7 @@ LValue LV = EmitLValue(E); Address Addr = LV.getAddress(); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); + if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -3195,9 +3213,8 @@ LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); - return LValue::MakeVectorElt(LHS.getAddress(), Idx, - E->getBase()->getType(), - LHS.getBaseInfo()); + return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), + LHS.getBaseInfo(), TBAAAccessInfo()); } // All the other cases basically behave like simple offsetting. @@ -3216,13 +3233,14 @@ } LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; Address Addr = Address::invalid(); if (const VariableArrayType *vla = getContext().getAsVariableArrayType(E->getType())) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. - Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); // The element count here is the total number of non-VLA elements. @@ -3246,7 +3264,7 @@ // Indexing over an interface, as in "NSString *P; P[4];" // Emit the base pointer. - Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT); @@ -3294,19 +3312,17 @@ E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); + TBAAInfo = CGM.getTBAAAccessInfo(E->getType()); } else { // The base must be a pointer; emit it with an estimate of its alignment. - Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, E->getExprLoc()); } - LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo, - CGM.getTBAAAccessInfo(E->getType())); - - // TODO: Preserve/extend path TBAA metadata? + LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo, TBAAInfo); if (getLangOpts().ObjC1 && getLangOpts().getGC() != LangOptions::NonGC) { @@ -3321,8 +3337,6 @@ TBAAAccessInfo &TBAAInfo, QualType BaseTy, QualType ElTy, bool IsLowerBound) { - TBAAInfo = CGF.CGM.getTBAAAccessInfo(ElTy); - LValue BaseLVal; if (auto *ASE = dyn_cast(Base->IgnoreParenImpCasts())) { BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); @@ -3352,7 +3366,7 @@ BaseInfo.mergeForCast(TypeInfo); return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); } - return CGF.EmitPointerWithAlignment(Base, &BaseInfo); + return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); } LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, @@ -3518,10 +3532,10 @@ // If it is a pointer to a vector, emit the address and form an lvalue with // it. LValueBaseInfo BaseInfo; - Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + TBAAAccessInfo TBAAInfo; + Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); const PointerType *PT = E->getBase()->getType()->getAs(); - Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo, - CGM.getTBAAAccessInfo(PT->getPointeeType())); + Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo, TBAAInfo); Base.getQuals().removeObjCGCAttr(); } else if (E->getBase()->isGLValue()) { // Otherwise, if the base is an lvalue ( as in the case of foo.x.x), @@ -3552,7 +3566,7 @@ llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); return LValue::MakeExtVectorElt(Base.getAddress(), CV, type, - Base.getBaseInfo()); + Base.getBaseInfo(), TBAAAccessInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -3563,7 +3577,7 @@ CElts.push_back(BaseElts->getAggregateElement(Indices[i])); llvm::Constant *CV = llvm::ConstantVector::get(CElts); return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type, - Base.getBaseInfo()); + Base.getBaseInfo(), TBAAAccessInfo()); } LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { @@ -3577,7 +3591,8 @@ LValue BaseLV; if (E->isArrow()) { LValueBaseInfo BaseInfo; - Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo); + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); QualType PtrTy = BaseExpr->getType()->getPointeeType(); SanitizerSet SkippedChecks; bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr); @@ -3587,8 +3602,7 @@ SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy, /*Alignment=*/CharUnits::Zero(), SkippedChecks); - BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo, - CGM.getTBAAAccessInfo(PtrTy)); + BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); } else BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess); @@ -3665,15 +3679,6 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field) { LValueBaseInfo BaseInfo = base.getBaseInfo(); - AlignmentSource fieldAlignSource = - getFieldAlignmentSource(BaseInfo.getAlignmentSource()); - LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias()); - - QualType type = field->getType(); - const RecordDecl *rec = field->getParent(); - if (rec->isUnion() || rec->hasAttr() || type->isVectorType()) - FieldBaseInfo.setMayAlias(true); - bool mayAlias = FieldBaseInfo.getMayAlias(); if (field->isBitField()) { const CGRecordLayout &RL = @@ -3693,19 +3698,55 @@ QualType fieldType = field->getType().withCVRQualifiers(base.getVRQualifiers()); - return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo); + // TODO: Support TBAA for bit fields. + LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource(), false); + return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, + TBAAAccessInfo()); + } + + // Fields of may-alias structures are may-alias themselves. + // FIXME: this should get propagated down through anonymous structs + // and unions. + QualType FieldType = field->getType(); + const RecordDecl *rec = field->getParent(); + AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource(); + LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource), false); + TBAAAccessInfo FieldTBAAInfo; + if (BaseInfo.getMayAlias() || rec->hasAttr() || + FieldType->isVectorType()) { + FieldBaseInfo.setMayAlias(true); + FieldTBAAInfo = CGM.getTBAAMayAliasAccessInfo(); + } else if (rec->isUnion()) { + // TODO: Support TBAA for unions. + FieldBaseInfo.setMayAlias(true); + FieldTBAAInfo = CGM.getTBAAMayAliasAccessInfo(); + } else { + // If no base type been assigned for the base access, then try to generate + // one for this base lvalue. + FieldTBAAInfo = base.getTBAAInfo(); + if (!FieldTBAAInfo.BaseType) { + FieldTBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(base.getType()); + assert(!FieldTBAAInfo.Offset && + "Nonzero offset for an access with no base type!"); + } + + // Adjust offset to be relative to the base type. + const ASTRecordLayout &Layout = + getContext().getASTRecordLayout(field->getParent()); + unsigned CharWidth = getContext().getCharWidth(); + if (FieldTBAAInfo.BaseType) + FieldTBAAInfo.Offset += + Layout.getFieldOffset(field->getFieldIndex()) / CharWidth; + + // Update the final access type. + FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType); } Address addr = base.getAddress(); unsigned cvr = base.getVRQualifiers(); - bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA; if (rec->isUnion()) { // For unions, there is no pointer adjustment. - assert(!type->isReferenceType() && "union has reference member"); - // TODO: handle path-aware TBAA for union. - TBAAPath = false; - - const auto FieldType = field->getType(); + assert(!FieldType->isReferenceType() && "union has reference member"); if (CGM.getCodeGenOpts().StrictVTablePointers && hasAnyVptr(FieldType, getContext())) // Because unions can easily skip invariant.barriers, we need to add @@ -3717,24 +3758,17 @@ addr = emitAddrOfFieldStorage(*this, addr, field); // If this is a reference field, load the reference right now. - if (const ReferenceType *refType = type->getAs()) { + if (const ReferenceType *refType = FieldType->getAs()) { llvm::LoadInst *load = Builder.CreateLoad(addr, "ref"); if (cvr & Qualifiers::Volatile) load->setVolatile(true); - // Loading the reference will disable path-aware TBAA. - TBAAPath = false; - TBAAAccessInfo TBAAInfo = mayAlias ? CGM.getTBAAMayAliasAccessInfo() : - CGM.getTBAAAccessInfo(type); - CGM.DecorateInstructionWithTBAA(load, TBAAInfo); - - mayAlias = false; - type = refType->getPointeeType(); - - CharUnits alignment = - getNaturalTypeAlignment(type, &FieldBaseInfo, /* TBAAInfo= */ nullptr, - /* forPointeeType= */ true); - FieldBaseInfo.setMayAlias(false); - addr = Address(load, alignment); + CGM.DecorateInstructionWithTBAA(load, FieldTBAAInfo); + + FieldType = refType->getPointeeType(); + CharUnits Align = getNaturalTypeAlignment(FieldType, &FieldBaseInfo, + &FieldTBAAInfo, + /* forPointeeType= */ true); + addr = Address(load, Align); // Qualifiers on the struct don't apply to the referencee, and // we'll pick up CVR from the actual type later, so reset these @@ -3747,46 +3781,15 @@ // for both unions and structs. A union needs a bitcast, a struct element // will need a bitcast if the LLVM type laid out doesn't match the desired // type. - addr = Builder.CreateElementBitCast(addr, - CGM.getTypes().ConvertTypeForMem(type), - field->getName()); + addr = Builder.CreateElementBitCast( + addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); if (field->hasAttr()) addr = EmitFieldAnnotations(field, addr); - LValue LV = MakeAddrLValue(addr, type, FieldBaseInfo, - CGM.getTBAAAccessInfo(type)); + LValue LV = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); LV.getQuals().addCVRQualifiers(cvr); - // Fields of may_alias structs act like 'char' for TBAA purposes. - // FIXME: this should get propagated down through anonymous structs - // and unions. - if (mayAlias) { - LV.setTBAAInfo(CGM.getTBAAMayAliasAccessInfo()); - } else if (TBAAPath) { - // If no base type been assigned for the base access, then try to generate - // one for this base lvalue. - TBAAAccessInfo TBAAInfo = base.getTBAAInfo(); - if (!TBAAInfo.BaseType) { - TBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(base.getType()); - assert(!TBAAInfo.Offset && - "Nonzero offset for an access with no base type!"); - } - - // Adjust offset to be relative to the base type. - const ASTRecordLayout &Layout = - getContext().getASTRecordLayout(field->getParent()); - unsigned CharWidth = getContext().getCharWidth(); - if (TBAAInfo.BaseType) - TBAAInfo.Offset += - Layout.getFieldOffset(field->getFieldIndex()) / CharWidth; - - // Update the final access type. - TBAAInfo.AccessType = LV.getTBAAInfo().AccessType; - - LV.setTBAAInfo(TBAAInfo); - } - // __weak attribute on a field is ignored. if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak) LV.getQuals().removeObjCGCAttr(); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGObjCRuntime.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGObjCRuntime.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGObjCRuntime.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGObjCRuntime.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -110,7 +110,8 @@ llvm::Type::getIntNTy(CGF.getLLVMContext(), Info->StorageSize)); return LValue::MakeBitfield(Addr, *Info, IvarTy, - LValueBaseInfo(AlignmentSource::Decl, false)); + LValueBaseInfo(AlignmentSource::Decl, false), + TBAAAccessInfo()); } namespace { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -2253,7 +2253,7 @@ ArgType = CGM.getContext().getPointerType(PointeeTy); QC.addRestrict(); enum { NVPTX_local_addr = 5 }; - QC.addAddressSpace(NVPTX_local_addr); + QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr)); ArgType = QC.apply(CGM.getContext(), ArgType); return ImplicitParamDecl::Create( CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), @@ -2273,7 +2273,7 @@ const Type *NonQualTy = QC.strip(NativeParamType); QualType NativePointeeTy = cast(NonQualTy)->getPointeeType(); unsigned NativePointeeAddrSpace = - NativePointeeTy.getQualifiers().getAddressSpace(); + CGF.getContext().getTargetAddressSpace(NativePointeeTy); QualType TargetTy = TargetParam->getType(); llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -403,6 +403,27 @@ } return; } + + // Check if current Field is better as a single field run. When current field + // has legal integer width, and its bitfield offset is naturally aligned, it + // is better to make the bitfield a separate storage component so as it can be + // accessed directly with lower cost. + auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) { + if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) + return false; + unsigned Width = Field->getBitWidthValue(Context); + if (!DataLayout.isLegalInteger(Width)) + return false; + // Make sure Field is natually aligned if it is treated as an IType integer. + if (getFieldBitOffset(*Field) % + Context.toBits(getAlignment(getIntNType(Width))) != + 0) + return false; + return true; + }; + + // The start field is better as a single field run. + bool StartFieldAsSingleRun = false; for (;;) { // Check to see if we need to start a new run. if (Run == FieldEnd) { @@ -414,17 +435,28 @@ Run = Field; StartBitOffset = getFieldBitOffset(*Field); Tail = StartBitOffset + Field->getBitWidthValue(Context); + StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run); } ++Field; continue; } - // Add bitfields to the run as long as they qualify. - if (Field != FieldEnd && Field->getBitWidthValue(Context) != 0 && + + // If the start field of a new run is better as a single run, or + // if current field is better as a single run, or + // if current field has zero width bitfield, or + // if the offset of current field is inconsistent with the offset of + // previous field plus its offset, + // skip the block below and go ahead to emit the storage. + // Otherwise, try to add bitfields to the run. + if (!StartFieldAsSingleRun && Field != FieldEnd && + !IsBetterAsSingleFieldRun(Field) && + Field->getBitWidthValue(Context) != 0 && Tail == getFieldBitOffset(*Field)) { Tail += Field->getBitWidthValue(Context); ++Field; continue; } + // We've hit a break-point in the run and need to emit a storage field. llvm::Type *Type = getIntNType(Tail - StartBitOffset); // Add the storage member to the record and set the bitfield info for all of @@ -435,6 +467,7 @@ Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset), MemberInfo::Field, nullptr, *Run)); Run = FieldEnd; + StartFieldAsSingleRun = false; } } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGValue.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGValue.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CGValue.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CGValue.h 2017-10-17 14:41:55.000000000 +0000 @@ -230,9 +230,8 @@ Expr *BaseIvarExp; private: - void Initialize(QualType Type, Qualifiers Quals, - CharUnits Alignment, LValueBaseInfo BaseInfo, - TBAAAccessInfo TBAAInfo = TBAAAccessInfo()) { + void Initialize(QualType Type, Qualifiers Quals, CharUnits Alignment, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { assert((!Alignment.isZero() || Type->isIncompleteType()) && "initializing l-value with zero alignment!"); this->Type = Type; @@ -314,7 +313,7 @@ const Qualifiers &getQuals() const { return Quals; } Qualifiers &getQuals() { return Quals; } - unsigned getAddressSpace() const { return Quals.getAddressSpace(); } + LangAS getAddressSpace() const { return Quals.getAddressSpace(); } CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); } void setAlignment(CharUnits A) { Alignment = A.getQuantity(); } @@ -381,24 +380,26 @@ } static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx, - QualType type, LValueBaseInfo BaseInfo) { + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = VectorElt; R.V = vecAddress.getPointer(); R.VectorIdx = Idx; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - BaseInfo); + BaseInfo, TBAAInfo); return R; } static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts, - QualType type, LValueBaseInfo BaseInfo) { + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = ExtVectorElt; R.V = vecAddress.getPointer(); R.VectorElts = Elts; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - BaseInfo); + BaseInfo, TBAAInfo); return R; } @@ -408,15 +409,15 @@ /// bit-field refers to. /// \param Info - The information describing how to perform the bit-field /// access. - static LValue MakeBitfield(Address Addr, - const CGBitFieldInfo &Info, - QualType type, - LValueBaseInfo BaseInfo) { + static LValue MakeBitfield(Address Addr, const CGBitFieldInfo &Info, + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = BitField; R.V = Addr.getPointer(); R.BitFieldInfo = &Info; - R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo); + R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo, + TBAAInfo); return R; } @@ -425,7 +426,8 @@ R.LVType = GlobalReg; R.V = Reg.getPointer(); R.Initialize(type, type.getQualifiers(), Reg.getAlignment(), - LValueBaseInfo(AlignmentSource::Decl, false)); + LValueBaseInfo(AlignmentSource::Decl, false), + TBAAAccessInfo()); return R; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenFunction.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenFunction.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenFunction.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenFunction.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -118,9 +118,9 @@ } CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo) { - return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, - /* TBAAInfo= */ nullptr, + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { + return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, /* forPointeeType= */ true); } @@ -525,8 +525,8 @@ // for example in clGetKernelArgInfo() implementation between the address // spaces with targets without unique mapping to the OpenCL address spaces // (basically all single AS CPUs). -static unsigned ArgInfoAddressSpace(unsigned LangAS) { - switch (LangAS) { +static unsigned ArgInfoAddressSpace(LangAS AS) { + switch (AS) { case LangAS::opencl_global: return 1; case LangAS::opencl_constant: return 2; case LangAS::opencl_local: return 3; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenFunction.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenFunction.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenFunction.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenFunction.h 2017-10-17 14:41:55.000000000 +0000 @@ -1949,7 +1949,8 @@ TBAAAccessInfo *TBAAInfo = nullptr, bool forPointeeType = false); CharUnits getNaturalPointeeTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy, LValueBaseInfo *BaseInfo = nullptr, @@ -3091,13 +3092,6 @@ llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, LValueBaseInfo BaseInfo, - bool isNontemporal = false) { - return EmitLoadOfScalar(Addr, Volatile, Ty, Loc, BaseInfo, - CGM.getTBAAAccessInfo(Ty), isNontemporal); - } - - llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, - SourceLocation Loc, LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, bool isNontemporal = false); @@ -3119,13 +3113,6 @@ } void EmitStoreOfScalar(llvm::Value *Value, Address Addr, - bool Volatile, QualType Ty, LValueBaseInfo BaseInfo, - bool isInit = false, bool isNontemporal = false) { - EmitStoreOfScalar(Value, Addr, Volatile, Ty, BaseInfo, - CGM.getTBAAAccessInfo(Ty), isInit, isNontemporal); - } - - void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, bool isInit = false, bool isNontemporal = false); @@ -3198,7 +3185,8 @@ RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc); Address EmitArrayToPointerDecay(const Expr *Array, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); class ConstantEmission { llvm::PointerIntPair ValueAndIsReference; @@ -3920,7 +3908,8 @@ /// reasonable to just ignore the returned alignment when it isn't from an /// explicit source. Address EmitPointerWithAlignment(const Expr *Addr, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenModule.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenModule.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenModule.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenModule.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -612,6 +612,13 @@ return TBAA->getMayAliasAccessInfo(); } +TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo) { + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo); +} + void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo) { if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) @@ -2493,10 +2500,9 @@ } } - auto ExpectedAS = + LangAS ExpectedAS = D ? D->getType().getAddressSpace() - : static_cast(LangOpts.OpenCL ? LangAS::opencl_global - : LangAS::Default); + : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); assert(getContext().getTargetAddressSpace(ExpectedAS) == Ty->getPointerAddressSpace()); if (AddrSpace != ExpectedAS) @@ -2635,11 +2641,10 @@ getDataLayout().getTypeStoreSizeInBits(Ty)); } -unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { - unsigned AddrSpace; +LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { + LangAS AddrSpace = LangAS::Default; if (LangOpts.OpenCL) { - AddrSpace = D ? D->getType().getAddressSpace() - : static_cast(LangAS::opencl_global); + AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global; assert(AddrSpace == LangAS::opencl_global || AddrSpace == LangAS::opencl_constant || AddrSpace == LangAS::opencl_local || @@ -3799,7 +3804,7 @@ !EvalResult.hasSideEffects()) Value = &EvalResult.Val; - unsigned AddrSpace = + LangAS AddrSpace = VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); Optional emitter; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenModule.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenModule.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenModule.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenModule.h 2017-10-17 14:41:55.000000000 +0000 @@ -677,6 +677,11 @@ /// may-alias accesses. TBAAAccessInfo getTBAAMayAliasAccessInfo(); + /// mergeTBAAInfoForCast - Get merged TBAA information for the purposes of + /// type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo); + bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor); bool isPaddedAtomicType(QualType type); @@ -735,7 +740,7 @@ /// /// For languages without explicit address spaces, if D has default address /// space, target-specific global or constant address space may be returned. - unsigned GetGlobalVarAddressSpace(const VarDecl *D); + LangAS GetGlobalVarAddressSpace(const VarDecl *D); /// Return the llvm::Constant for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenTBAA.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenTBAA.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenTBAA.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenTBAA.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -309,3 +309,11 @@ TBAAAccessInfo CodeGenTBAA::getMayAliasAccessInfo() { return TBAAAccessInfo(getChar()); } + +TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo) { + TBAAAccessInfo MayAliasInfo = getMayAliasAccessInfo(); + if (SourceInfo == MayAliasInfo || TargetInfo == MayAliasInfo) + return MayAliasInfo; + return TargetInfo; +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenTBAA.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenTBAA.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenTBAA.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenTBAA.h 2017-10-17 14:41:55.000000000 +0000 @@ -47,6 +47,12 @@ : TBAAAccessInfo(/* AccessType= */ nullptr) {} + bool operator==(const TBAAAccessInfo &Other) const { + return BaseType == Other.BaseType && + AccessType == Other.AccessType && + Offset == Other.Offset; + } + /// BaseType - The base/leading access type. May be null if this access /// descriptor represents an access that is not considered to be an access /// to an aggregate or union member. @@ -136,6 +142,11 @@ /// getMayAliasAccessInfo - Get TBAA information that represents may-alias /// accesses. TBAAAccessInfo getMayAliasAccessInfo(); + + /// mergeTBAAInfoForCast - Get merged TBAA information for the purpose of + /// type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo); }; } // end namespace CodeGen diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenTypeCache.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenTypeCache.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CodeGenTypeCache.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CodeGenTypeCache.h 2017-10-17 14:41:55.000000000 +0000 @@ -15,6 +15,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CODEGENTYPECACHE_H #include "clang/AST/CharUnits.h" +#include "clang/Basic/AddressSpaces.h" #include "llvm/IR/CallingConv.h" namespace llvm { @@ -94,7 +95,7 @@ unsigned char SizeAlignInBytes; }; - unsigned ASTAllocaAddressSpace; + LangAS ASTAllocaAddressSpace; CharUnits getSizeSize() const { return CharUnits::fromQuantity(SizeSizeInBytes); @@ -114,7 +115,7 @@ llvm::CallingConv::ID BuiltinCC; llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; } - unsigned getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } + LangAS getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } }; } // end namespace CodeGen diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/ConstantEmitter.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/ConstantEmitter.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/ConstantEmitter.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/ConstantEmitter.h 2017-10-17 14:41:55.000000000 +0000 @@ -40,7 +40,7 @@ /// The AST address space where this (non-abstract) initializer is going. /// Used for generating appropriate placeholders. - unsigned DestAddressSpace; + LangAS DestAddressSpace; llvm::SmallVector, 4> PlaceholderAddresses; @@ -68,11 +68,9 @@ /// Try to emit the initiaizer of the given declaration as an abstract /// constant. If this succeeds, the emission must be finalized. llvm::Constant *tryEmitForInitializer(const VarDecl &D); - llvm::Constant *tryEmitForInitializer(const Expr *E, - unsigned destAddrSpace, + llvm::Constant *tryEmitForInitializer(const Expr *E, LangAS destAddrSpace, QualType destType); - llvm::Constant *emitForInitializer(const APValue &value, - unsigned destAddrSpace, + llvm::Constant *emitForInitializer(const APValue &value, LangAS destAddrSpace, QualType destType); void finalize(llvm::GlobalVariable *global); @@ -151,7 +149,7 @@ llvm::GlobalValue *placeholder); private: - void initializeNonAbstract(unsigned destAS) { + void initializeNonAbstract(LangAS destAS) { assert(!InitializedNonAbstract); InitializedNonAbstract = true; DestAddressSpace = destAS; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CoverageMappingGen.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CoverageMappingGen.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/CoverageMappingGen.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/CoverageMappingGen.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -758,6 +758,22 @@ handleFileExit(getEnd(S)); } + /// Determine whether the final deferred region emitted in \p Body should be + /// discarded. + static bool discardFinalDeferredRegionInDecl(Stmt *Body) { + if (auto *CS = dyn_cast(Body)) { + Stmt *LastStmt = CS->body_back(); + if (auto *IfElse = dyn_cast(LastStmt)) { + if (auto *Else = dyn_cast_or_null(IfElse->getElse())) + LastStmt = Else->body_back(); + else + LastStmt = IfElse->getElse(); + } + return dyn_cast_or_null(LastStmt); + } + return false; + } + void VisitDecl(const Decl *D) { assert(!DeferredRegion && "Deferred region never completed"); @@ -770,14 +786,14 @@ Counter ExitCount = propagateCounts(getRegionCounter(Body), Body); assert(RegionStack.empty() && "Regions entered but never exited"); - // Special case: if the last statement is a return, throw away the - // deferred region. This allows the closing brace to have a count. - if (auto *CS = dyn_cast_or_null(Body)) - if (dyn_cast_or_null(CS->body_back())) + if (DeferredRegion) { + // Complete (or discard) any deferred regions introduced by the last + // statement. + if (discardFinalDeferredRegionInDecl(Body)) DeferredRegion = None; - - // Complete any deferred regions introduced by the last statement. - popRegions(completeDeferred(ExitCount, getEnd(Body))); + else + popRegions(completeDeferred(ExitCount, getEnd(Body))); + } } void VisitReturnStmt(const ReturnStmt *S) { @@ -1083,16 +1099,18 @@ } void VisitBinLAnd(const BinaryOperator *E) { - extendRegion(E); - Visit(E->getLHS()); + extendRegion(E->getLHS()); + propagateCounts(getRegion().getCounter(), E->getLHS()); + handleFileExit(getEnd(E->getLHS())); extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); } void VisitBinLOr(const BinaryOperator *E) { - extendRegion(E); - Visit(E->getLHS()); + extendRegion(E->getLHS()); + propagateCounts(getRegion().getCounter(), E->getLHS()); + handleFileExit(getEnd(E->getLHS())); extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/TargetInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/TargetInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/TargetInfo.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/TargetInfo.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -423,18 +423,17 @@ return llvm::ConstantPointerNull::get(T); } -unsigned TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const { +LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { assert(!CGM.getLangOpts().OpenCL && !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && "Address space agnostic languages only"); - return D ? D->getType().getAddressSpace() - : static_cast(LangAS::Default); + return D ? D->getType().getAddressSpace() : LangAS::Default; } llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( - CodeGen::CodeGenFunction &CGF, llvm::Value *Src, unsigned SrcAddr, - unsigned DestAddr, llvm::Type *DestTy, bool isNonNull) const { + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. if (auto *C = dyn_cast(Src)) @@ -444,7 +443,7 @@ llvm::Constant * TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, - unsigned SrcAddr, unsigned DestAddr, + LangAS SrcAddr, LangAS DestAddr, llvm::Type *DestTy) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. @@ -7611,12 +7610,12 @@ llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; - unsigned getASTAllocaAddressSpace() const override { - return LangAS::FirstTargetAddressSpace + - getABIInfo().getDataLayout().getAllocaAddrSpace(); + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); } - unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const override; + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const override; llvm::Function * @@ -7707,21 +7706,19 @@ llvm::ConstantPointerNull::get(NPT), PT); } -unsigned +LangAS AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { assert(!CGM.getLangOpts().OpenCL && !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && "Address space agnostic languages only"); - unsigned DefaultGlobalAS = - LangAS::FirstTargetAddressSpace + - CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); if (!D) return DefaultGlobalAS; - unsigned AddrSpace = D->getType().getAddressSpace(); - assert(AddrSpace == LangAS::Default || - AddrSpace >= LangAS::FirstTargetAddressSpace); + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); if (AddrSpace != LangAS::Default) return AddrSpace; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/TargetInfo.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/TargetInfo.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/CodeGen/TargetInfo.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/CodeGen/TargetInfo.h 2017-10-17 14:41:55.000000000 +0000 @@ -236,11 +236,11 @@ /// other than OpenCL and CUDA. /// If \p D is nullptr, returns the default target favored address space /// for global variable. - virtual unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const; + virtual LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const; /// Get the AST address space for alloca. - virtual unsigned getASTAllocaAddressSpace() const { return LangAS::Default; } + virtual LangAS getASTAllocaAddressSpace() const { return LangAS::Default; } /// Perform address space cast of an expression of pointer type. /// \param V is the LLVM value to be casted to another address space. @@ -249,9 +249,8 @@ /// \param DestTy is the destination LLVM pointer type. /// \param IsNonNull is the flag indicating \p V is known to be non null. virtual llvm::Value *performAddrSpaceCast(CodeGen::CodeGenFunction &CGF, - llvm::Value *V, unsigned SrcAddr, - unsigned DestAddr, - llvm::Type *DestTy, + llvm::Value *V, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool IsNonNull = false) const; /// Perform address space cast of a constant expression of pointer type. @@ -259,9 +258,10 @@ /// \param SrcAddr is the language address space of \p V. /// \param DestAddr is the targeted language address space. /// \param DestTy is the destination LLVM pointer type. - virtual llvm::Constant * - performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr, - unsigned DestAddr, llvm::Type *DestTy) const; + virtual llvm::Constant *performAddrSpaceCast(CodeGenModule &CGM, + llvm::Constant *V, + LangAS SrcAddr, LangAS DestAddr, + llvm::Type *DestTy) const; /// Get the syncscope used in LLVM IR. virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/SanitizerArgs.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/SanitizerArgs.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/SanitizerArgs.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/SanitizerArgs.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -171,19 +171,23 @@ } bool SanitizerArgs::needsUbsanRt() const { - return ((Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) || - CoverageFeatures) && - !Sanitizers.has(Address) && !Sanitizers.has(Memory) && - !Sanitizers.has(Thread) && !Sanitizers.has(DataFlow) && - !Sanitizers.has(Leak) && !CfiCrossDso; + // All of these include ubsan. + if (needsAsanRt() || needsMsanRt() || needsTsanRt() || needsDfsanRt() || + needsLsanRt() || needsCfiDiagRt()) + return false; + + return (Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) || + CoverageFeatures; } bool SanitizerArgs::needsCfiRt() const { - return !(Sanitizers.Mask & CFI & ~TrapSanitizers.Mask) && CfiCrossDso; + return !(Sanitizers.Mask & CFI & ~TrapSanitizers.Mask) && CfiCrossDso && + !ImplicitCfiRuntime; } bool SanitizerArgs::needsCfiDiagRt() const { - return (Sanitizers.Mask & CFI & ~TrapSanitizers.Mask) && CfiCrossDso; + return (Sanitizers.Mask & CFI & ~TrapSanitizers.Mask) && CfiCrossDso && + !ImplicitCfiRuntime; } bool SanitizerArgs::requiresPIE() const { @@ -615,6 +619,8 @@ TC.getTriple().isAndroid() || TC.getTriple().isOSFuchsia() || TC.getTriple().isOSDarwin()); + ImplicitCfiRuntime = TC.getTriple().isAndroid(); + if (AllAddedKinds & Address) { NeedPIE |= TC.getTriple().isAndroid() || TC.getTriple().isOSFuchsia(); if (Arg *A = diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChain.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChain.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChain.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChain.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -390,7 +390,11 @@ // then use whatever the default system linker is. return GetProgramPath(getDefaultLinker()); } else { - llvm::SmallString<8> LinkerName("ld."); + llvm::SmallString<8> LinkerName; + if (Triple.isOSDarwin()) + LinkerName.append("ld64."); + else + LinkerName.append("ld."); LinkerName.append(UseLinker); std::string LinkerPath(GetProgramPath(LinkerName.c_str())); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChains/Clang.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChains/Clang.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChains/Clang.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChains/Clang.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -3365,6 +3365,9 @@ options::OPT_fno_optimize_sibling_calls)) CmdArgs.push_back("-mdisable-tail-calls"); + Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses, + options::OPT_fno_fine_grained_bitfield_accesses); + // Handle segmented stacks. if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("-split-stacks"); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChains/Cuda.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChains/Cuda.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChains/Cuda.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChains/Cuda.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -87,8 +87,7 @@ LibDevicePath = InstallPath + "/nvvm/libdevice"; auto &FS = D.getVFS(); - if (!(FS.exists(IncludePath) && FS.exists(BinPath) && - FS.exists(LibDevicePath))) + if (!(FS.exists(IncludePath) && FS.exists(BinPath))) continue; // On Linux, we have both lib and lib64 directories, and we need to choose @@ -167,17 +166,9 @@ } } - // This code prevents IsValid from being set when - // no libdevice has been found. - bool allEmpty = true; - std::string LibDeviceFile; - for (auto key : LibDeviceMap.keys()) { - LibDeviceFile = LibDeviceMap.lookup(key); - if (!LibDeviceFile.empty()) - allEmpty = false; - } - - if (allEmpty) + // Check that we have found at least one libdevice that we can link in if + // -nocudalib hasn't been specified. + if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib)) continue; IsValid = true; @@ -551,9 +542,9 @@ // flags are not duplicated. // Also append the compute capability. if (DeviceOffloadKind == Action::OFK_OpenMP) { - for (Arg *A : Args){ + for (Arg *A : Args) { bool IsDuplicate = false; - for (Arg *DALArg : *DAL){ + for (Arg *DALArg : *DAL) { if (A == DALArg) { IsDuplicate = true; break; @@ -564,14 +555,9 @@ } StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); - if (Arch.empty()) { - // Default compute capability for CUDA toolchain is the - // lowest compute capability supported by the installed - // CUDA version. - DAL->AddJoinedArg(nullptr, - Opts.getOption(options::OPT_march_EQ), - CudaInstallation.getLowestExistingArch()); - } + if (Arch.empty()) + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), + CLANG_OPENMP_NVPTX_DEFAULT_ARCH); return DAL; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChains/Cuda.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChains/Cuda.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Driver/ToolChains/Cuda.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Driver/ToolChains/Cuda.h 2017-10-17 14:41:55.000000000 +0000 @@ -76,17 +76,6 @@ std::string getLibDeviceFile(StringRef Gpu) const { return LibDeviceMap.lookup(Gpu); } - /// \brief Get lowest available compute capability - /// for which a libdevice library exists. - std::string getLowestExistingArch() const { - std::string LibDeviceFile; - for (auto key : LibDeviceMap.keys()) { - LibDeviceFile = LibDeviceMap.lookup(key); - if (!LibDeviceFile.empty()) - return key; - } - return "sm_20"; - } }; namespace tools { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/BreakableToken.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/BreakableToken.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/BreakableToken.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/BreakableToken.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -599,6 +599,12 @@ } } +bool BreakableBlockComment::introducesBreakBefore(unsigned LineIndex) const { + // A break is introduced when we want delimiters on newline. + return LineIndex == 0 && DelimitersOnNewline && + Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; +} + void BreakableBlockComment::replaceWhitespaceBefore( unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, Split SplitBefore, WhitespaceManager &Whitespaces) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/BreakableToken.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/BreakableToken.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/BreakableToken.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/BreakableToken.h 2017-10-17 14:41:55.000000000 +0000 @@ -58,6 +58,8 @@ /// operations that might be executed before the main line breaking occurs: /// - getSplitBefore, for finding a split such that the content preceding it /// needs to be specially reflown, +/// - introducesBreakBefore, for checking if reformatting the beginning +/// of the content introduces a line break before it, /// - getLineLengthAfterSplitBefore, for calculating the line length in columns /// of the remainder of the content after the beginning of the content has /// been reformatted, and @@ -135,6 +137,12 @@ return Split(StringRef::npos, 0); } + /// \brief Returns if a break before the content at \p LineIndex will be + /// inserted after the whitespace preceding the content has been reformatted. + virtual bool introducesBreakBefore(unsigned LineIndex) const { + return false; + } + /// \brief Returns the number of columns required to format the piece of line /// at \p LineIndex after the content preceding the whitespace range specified /// \p SplitBefore has been reformatted, but before any breaks are made to @@ -339,6 +347,7 @@ Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, llvm::Regex &CommentPragmasRegex) const override; + bool introducesBreakBefore(unsigned LineIndex) const override; unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/ContinuationIndenter.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/ContinuationIndenter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/ContinuationIndenter.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/ContinuationIndenter.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -106,6 +106,7 @@ /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; + State.NoContinuation = false; State.StartOfStringLiteral = 0; State.StartOfLineLevel = 0; State.LowestLevelOnLine = 0; @@ -322,6 +323,12 @@ Previous.TokenText == "\'\\n\'")))) return true; + if (Previous.is(TT_BlockComment) && Previous.IsMultiline) + return true; + + if (State.NoContinuation) + return true; + return false; } @@ -331,6 +338,8 @@ const FormatToken &Current = *State.NextToken; assert(!State.Stack.empty()); + State.NoContinuation = false; + if ((Current.is(TT_ImplicitStringLiteral) && (Current.Previous->Tok.getIdentifierInfo() == nullptr || Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == @@ -1286,7 +1295,7 @@ return 0; } } else if (Current.is(TT_BlockComment)) { - if (!Current.isTrailingComment() || !Style.ReflowComments || + if (!Style.ReflowComments || // If a comment token switches formatting, like // /* clang-format on */, we don't want to break it further, // but we may still want to adjust its indentation. @@ -1332,6 +1341,7 @@ ReflowInProgress = SplitBefore.first != StringRef::npos; TailOffset = ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; + BreakInserted = BreakInserted || Token->introducesBreakBefore(LineIndex); if (!DryRun) Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, RemainingSpace, SplitBefore, Whitespaces); @@ -1408,6 +1418,9 @@ State.Stack[i].BreakBeforeParameter = true; } + if (Current.is(TT_BlockComment)) + State.NoContinuation = true; + Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString : Style.PenaltyBreakComment; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/ContinuationIndenter.h llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/ContinuationIndenter.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Format/ContinuationIndenter.h 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Format/ContinuationIndenter.h 2017-10-17 14:41:55.000000000 +0000 @@ -318,6 +318,9 @@ /// \brief \c true if this line contains a continued for-loop section. bool LineContainsContinuedForLoopSection; + /// \brief \c true if \p NextToken should not continue this line. + bool NoContinuation; + /// \brief The \c NestingLevel at the start of this line. unsigned StartOfLineLevel; @@ -364,6 +367,8 @@ if (LineContainsContinuedForLoopSection != Other.LineContainsContinuedForLoopSection) return LineContainsContinuedForLoopSection; + if (NoContinuation != Other.NoContinuation) + return NoContinuation; if (StartOfLineLevel != Other.StartOfLineLevel) return StartOfLineLevel < Other.StartOfLineLevel; if (LowestLevelOnLine != Other.LowestLevelOnLine) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Frontend/CompilerInvocation.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Frontend/CompilerInvocation.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Frontend/CompilerInvocation.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Frontend/CompilerInvocation.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -546,6 +546,9 @@ OPT_fuse_register_sized_bitfield_access); Opts.RelaxedAliasing = Args.hasArg(OPT_relaxed_aliasing); Opts.StructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa); + Opts.FineGrainedBitfieldAccesses = + Args.hasFlag(OPT_ffine_grained_bitfield_accesses, + OPT_fno_fine_grained_bitfield_accesses, false); Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags); Opts.MergeAllConstants = !Args.hasArg(OPT_fno_merge_all_constants); Opts.NoCommon = Args.hasArg(OPT_fno_common); @@ -2763,6 +2766,13 @@ if (Arch == llvm::Triple::spir || Arch == llvm::Triple::spir64) { Res.getDiagnosticOpts().Warnings.push_back("spir-compat"); } + + // If sanitizer is enabled, disable OPT_ffine_grained_bitfield_accesses. + if (Res.getCodeGenOpts().FineGrainedBitfieldAccesses && + !Res.getLangOpts()->Sanitize.empty()) { + Res.getCodeGenOpts().FineGrainedBitfieldAccesses = false; + Diags.Report(diag::warn_drv_fine_grained_bitfield_accesses_ignored); + } return Success; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Frontend/PrintPreprocessedOutput.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Frontend/PrintPreprocessedOutput.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Frontend/PrintPreprocessedOutput.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Frontend/PrintPreprocessedOutput.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -720,6 +720,12 @@ // -traditional-cpp the lexer keeps /all/ whitespace, including comments. SourceLocation StartLoc = Tok.getLocation(); Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength())); + } else if (Tok.is(tok::eod)) { + // Don't print end of directive tokens, since they are typically newlines + // that mess up our line tracking. These come from unknown pre-processor + // directives or hash-prefixed comments in standalone assembly files. + PP.Lex(Tok); + continue; } else if (Tok.is(tok::annot_module_include)) { // PrintPPOutputPPCallbacks::InclusionDirective handles producing // appropriate output here. Ignore this token entirely. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaChecking.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaChecking.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaChecking.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaChecking.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -3140,7 +3140,7 @@ // Treat this argument as _Nonnull as we want to show a warning if // NULL is passed into it. CheckNonNullArgument(*this, ValArg, DRE->getLocStart()); - unsigned AS = 0; + LangAS AS = LangAS::Default; // Keep address space of non-atomic pointer type. if (const PointerType *PtrTy = ValArg->getType()->getAs()) { @@ -8556,19 +8556,71 @@ void AnalyzeImplicitConversions(Sema &S, Expr *E, SourceLocation CC); -bool IsZero(Sema &S, Expr *E) { +bool IsEnumConstOrFromMacro(Sema &S, Expr *E) { // Suppress cases where we are comparing against an enum constant. if (const DeclRefExpr *DR = dyn_cast(E->IgnoreParenImpCasts())) if (isa(DR->getDecl())) - return false; + return true; // Suppress cases where the '0' value is expanded from a macro. if (E->getLocStart().isMacroID()) - return false; + return true; - llvm::APSInt Value; - return E->isIntegerConstantExpr(Value, S.Context) && Value == 0; + return false; +} + +bool isNonBooleanIntegerValue(Expr *E) { + return !E->isKnownToHaveBooleanValue() && E->getType()->isIntegerType(); +} + +bool isNonBooleanUnsignedValue(Expr *E) { + // We are checking that the expression is not known to have boolean value, + // is an integer type; and is either unsigned after implicit casts, + // or was unsigned before implicit casts. + return isNonBooleanIntegerValue(E) && + (!E->getType()->isSignedIntegerType() || + !E->IgnoreParenImpCasts()->getType()->isSignedIntegerType()); +} + +enum class LimitType { + Max, // e.g. 32767 for short + Min // e.g. -32768 for short +}; + +/// Checks whether Expr 'Constant' may be the +/// std::numeric_limits<>::max() or std::numeric_limits<>::min() +/// of the Expr 'Other'. If true, then returns the limit type (min or max). +/// The Value is the evaluation of Constant +llvm::Optional IsTypeLimit(Sema &S, Expr *Constant, Expr *Other, + const llvm::APSInt &Value) { + if (IsEnumConstOrFromMacro(S, Constant)) + return llvm::Optional(); + + if (isNonBooleanUnsignedValue(Other) && Value == 0) + return LimitType::Min; + + // TODO: Investigate using GetExprRange() to get tighter bounds + // on the bit ranges. + QualType OtherT = Other->IgnoreParenImpCasts()->getType(); + if (const auto *AT = OtherT->getAs()) + OtherT = AT->getValueType(); + + IntRange OtherRange = IntRange::forValueOfType(S.Context, OtherT); + + if (llvm::APSInt::isSameValue( + llvm::APSInt::getMaxValue(OtherRange.Width, + OtherT->isUnsignedIntegerType()), + Value)) + return LimitType::Max; + + if (llvm::APSInt::isSameValue( + llvm::APSInt::getMinValue(OtherRange.Width, + OtherT->isUnsignedIntegerType()), + Value)) + return LimitType::Min; + + return llvm::Optional(); } bool HasEnumType(Expr *E) { @@ -8583,63 +8635,60 @@ return E->getType()->isEnumeralType(); } -bool isNonBooleanUnsignedValue(Expr *E) { - // We are checking that the expression is not known to have boolean value, - // is an integer type; and is either unsigned after implicit casts, - // or was unsigned before implicit casts. - return !E->isKnownToHaveBooleanValue() && E->getType()->isIntegerType() && - (!E->getType()->isSignedIntegerType() || - !E->IgnoreParenImpCasts()->getType()->isSignedIntegerType()); -} - -bool CheckTautologicalComparisonWithZero(Sema &S, BinaryOperator *E) { - // Disable warning in template instantiations. - if (S.inTemplateInstantiation()) +bool CheckTautologicalComparison(Sema &S, BinaryOperator *E, Expr *Constant, + Expr *Other, const llvm::APSInt &Value, + bool RhsConstant) { + // Disable warning in template instantiations + // and only analyze <, >, <= and >= operations. + if (S.inTemplateInstantiation() || !E->isRelationalOp()) return false; - // bool values are handled by DiagnoseOutOfRangeComparison(). - BinaryOperatorKind Op = E->getOpcode(); - if (E->isValueDependent()) + + QualType OType = Other->IgnoreParenImpCasts()->getType(); + + llvm::Optional ValueType; // Which limit (min/max) is the constant? + + if (!(isNonBooleanIntegerValue(Other) && + (ValueType = IsTypeLimit(S, Constant, Other, Value)))) return false; - Expr *LHS = E->getLHS(); - Expr *RHS = E->getRHS(); + bool ConstIsLowerBound = (Op == BO_LT || Op == BO_LE) ^ RhsConstant; + bool ResultWhenConstEqualsOther = (Op == BO_LE || Op == BO_GE); + bool ResultWhenConstNeOther = + ConstIsLowerBound ^ (ValueType == LimitType::Max); + if (ResultWhenConstEqualsOther != ResultWhenConstNeOther) + return false; // The comparison is not tautological. + + const bool Result = ResultWhenConstEqualsOther; + + unsigned Diag = (isNonBooleanUnsignedValue(Other) && Value == 0) + ? (HasEnumType(Other) + ? diag::warn_unsigned_enum_always_true_comparison + : diag::warn_unsigned_always_true_comparison) + : diag::warn_tautological_constant_compare; - bool Match = true; + // Should be enough for uint128 (39 decimal digits) + SmallString<64> PrettySourceValue; + llvm::raw_svector_ostream OS(PrettySourceValue); + OS << Value; - if (Op == BO_LT && isNonBooleanUnsignedValue(LHS) && IsZero(S, RHS)) { - S.Diag(E->getOperatorLoc(), - HasEnumType(LHS) ? diag::warn_lunsigned_enum_always_true_comparison - : diag::warn_lunsigned_always_true_comparison) - << "< 0" << false << LHS->getSourceRange() << RHS->getSourceRange(); - } else if (Op == BO_GE && isNonBooleanUnsignedValue(LHS) && IsZero(S, RHS)) { - S.Diag(E->getOperatorLoc(), - HasEnumType(LHS) ? diag::warn_lunsigned_enum_always_true_comparison - : diag::warn_lunsigned_always_true_comparison) - << ">= 0" << true << LHS->getSourceRange() << RHS->getSourceRange(); - } else if (Op == BO_GT && isNonBooleanUnsignedValue(RHS) && IsZero(S, LHS)) { - S.Diag(E->getOperatorLoc(), - HasEnumType(RHS) ? diag::warn_runsigned_enum_always_true_comparison - : diag::warn_runsigned_always_true_comparison) - << "0 >" << false << LHS->getSourceRange() << RHS->getSourceRange(); - } else if (Op == BO_LE && isNonBooleanUnsignedValue(RHS) && IsZero(S, LHS)) { - S.Diag(E->getOperatorLoc(), - HasEnumType(RHS) ? diag::warn_runsigned_enum_always_true_comparison - : diag::warn_runsigned_always_true_comparison) - << "0 <=" << true << LHS->getSourceRange() << RHS->getSourceRange(); - } else - Match = false; + S.Diag(E->getOperatorLoc(), Diag) + << RhsConstant << OType << E->getOpcodeStr() << OS.str() << Result + << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); - return Match; + return true; } -void DiagnoseOutOfRangeComparison(Sema &S, BinaryOperator *E, Expr *Constant, +bool DiagnoseOutOfRangeComparison(Sema &S, BinaryOperator *E, Expr *Constant, Expr *Other, const llvm::APSInt &Value, bool RhsConstant) { // Disable warning in template instantiations. if (S.inTemplateInstantiation()) - return; + return false; + + Constant = Constant->IgnoreParenImpCasts(); + Other = Other->IgnoreParenImpCasts(); // TODO: Investigate using GetExprRange() to get tighter bounds // on the bit ranges. @@ -8651,10 +8700,6 @@ bool OtherIsBooleanType = Other->isKnownToHaveBooleanValue(); - // 0 values are handled later by CheckTautologicalComparisonWithZero(). - if ((Value == 0) && (!OtherIsBooleanType)) - return; - BinaryOperatorKind op = E->getOpcode(); bool IsTrue = true; @@ -8670,7 +8715,7 @@ QualType CommonT = E->getLHS()->getType(); if (S.Context.hasSameUnqualifiedType(OtherT, ConstantT)) - return; + return false; assert((OtherT->isIntegerType() && ConstantT->isIntegerType()) && "comparison with non-integer type"); @@ -8685,38 +8730,38 @@ // Check that the constant is representable in type OtherT. if (ConstantSigned) { if (OtherWidth >= Value.getMinSignedBits()) - return; + return false; } else { // !ConstantSigned if (OtherWidth >= Value.getActiveBits() + 1) - return; + return false; } } else { // !OtherSigned // Check that the constant is representable in type OtherT. // Negative values are out of range. if (ConstantSigned) { if (Value.isNonNegative() && OtherWidth >= Value.getActiveBits()) - return; + return false; } else { // !ConstantSigned if (OtherWidth >= Value.getActiveBits()) - return; + return false; } } } else { // !CommonSigned if (OtherRange.NonNegative) { if (OtherWidth >= Value.getActiveBits()) - return; + return false; } else { // OtherSigned assert(!ConstantSigned && "Two signed types converted to unsigned types."); // Check to see if the constant is representable in OtherT. if (OtherWidth > Value.getActiveBits()) - return; + return false; // Check to see if the constant is equivalent to a negative value // cast to CommonT. if (S.Context.getIntWidth(ConstantT) == S.Context.getIntWidth(CommonT) && Value.isNegative() && Value.getMinSignedBits() <= OtherWidth) - return; + return false; // The constant value rests between values that OtherT can represent // after conversion. Relational comparison still works, but equality // comparisons will be tautological. @@ -8729,7 +8774,7 @@ if (op == BO_EQ || op == BO_NE) { IsTrue = op == BO_NE; } else if (EqualityOnly) { - return; + return false; } else if (RhsConstant) { if (op == BO_GT || op == BO_GE) IsTrue = !PositiveConstant; @@ -8817,7 +8862,7 @@ } else if (CmpRes == ATrue) { IsTrue = true; } else { - return; + return false; } } @@ -8840,6 +8885,8 @@ << OS.str() << LiteralOrBoolConstant << OtherT << (OtherIsBooleanType && !OtherT->isBooleanType()) << IsTrue << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange()); + + return true; } /// Analyze the operands of the given comparison. Implements the @@ -8865,44 +8912,48 @@ if (E->isValueDependent()) return AnalyzeImpConvsInComparison(S, E); - Expr *LHS = E->getLHS()->IgnoreParenImpCasts(); - Expr *RHS = E->getRHS()->IgnoreParenImpCasts(); - - bool IsComparisonConstant = false; - - // Check whether an integer constant comparison results in a value - // of 'true' or 'false'. + Expr *LHS = E->getLHS(); + Expr *RHS = E->getRHS(); + if (T->isIntegralType(S.Context)) { llvm::APSInt RHSValue; - bool IsRHSIntegralLiteral = - RHS->isIntegerConstantExpr(RHSValue, S.Context); llvm::APSInt LHSValue; - bool IsLHSIntegralLiteral = - LHS->isIntegerConstantExpr(LHSValue, S.Context); - if (IsRHSIntegralLiteral && !IsLHSIntegralLiteral) - DiagnoseOutOfRangeComparison(S, E, RHS, LHS, RHSValue, true); - else if (!IsRHSIntegralLiteral && IsLHSIntegralLiteral) - DiagnoseOutOfRangeComparison(S, E, LHS, RHS, LHSValue, false); - else - IsComparisonConstant = - (IsRHSIntegralLiteral && IsLHSIntegralLiteral); - } else if (!T->hasUnsignedIntegerRepresentation()) - IsComparisonConstant = E->isIntegerConstantExpr(S.Context); - - // We don't care about value-dependent expressions or expressions - // whose result is a constant. - if (IsComparisonConstant) - return AnalyzeImpConvsInComparison(S, E); - // If this is a tautological comparison, suppress -Wsign-compare. - if (CheckTautologicalComparisonWithZero(S, E)) - return AnalyzeImpConvsInComparison(S, E); + bool IsRHSIntegralLiteral = RHS->isIntegerConstantExpr(RHSValue, S.Context); + bool IsLHSIntegralLiteral = LHS->isIntegerConstantExpr(LHSValue, S.Context); + + // We don't care about expressions whose result is a constant. + if (IsRHSIntegralLiteral && IsLHSIntegralLiteral) + return AnalyzeImpConvsInComparison(S, E); + + // We only care about expressions where just one side is literal + if (IsRHSIntegralLiteral ^ IsLHSIntegralLiteral) { + // Is the constant on the RHS or LHS? + const bool RhsConstant = IsRHSIntegralLiteral; + Expr *Const = RhsConstant ? RHS : LHS; + Expr *Other = RhsConstant ? LHS : RHS; + const llvm::APSInt &Value = RhsConstant ? RHSValue : LHSValue; + + // Check whether an integer constant comparison results in a value + // of 'true' or 'false'. + + if (CheckTautologicalComparison(S, E, Const, Other, Value, RhsConstant)) + return AnalyzeImpConvsInComparison(S, E); - // We don't do anything special if this isn't an unsigned integral - // comparison: we're only interested in integral comparisons, and - // signed comparisons only happen in cases we don't care to warn about. - if (!T->hasUnsignedIntegerRepresentation()) + if (DiagnoseOutOfRangeComparison(S, E, Const, Other, Value, RhsConstant)) + return AnalyzeImpConvsInComparison(S, E); + } + } + + if (!T->hasUnsignedIntegerRepresentation()) { + // We don't do anything special if this isn't an unsigned integral + // comparison: we're only interested in integral comparisons, and + // signed comparisons only happen in cases we don't care to warn about. return AnalyzeImpConvsInComparison(S, E); + } + + LHS = LHS->IgnoreParenImpCasts(); + RHS = RHS->IgnoreParenImpCasts(); // Check to see if one of the (unmodified) operands is of different // signedness. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaDeclAttr.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaDeclAttr.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaDeclAttr.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaDeclAttr.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -4385,7 +4385,7 @@ static bool isValidSwiftContextType(QualType type) { if (!type->hasPointerRepresentation()) return type->isDependentType(); - return type->getPointeeType().getAddressSpace() == 0; + return type->getPointeeType().getAddressSpace() == LangAS::Default; } /// Pointers and references in the default address space. @@ -4397,7 +4397,7 @@ } else { return type->isDependentType(); } - return type.getAddressSpace() == 0; + return type.getAddressSpace() == LangAS::Default; } /// Pointers and references to pointers in the default address space. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaDecl.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaDecl.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaDecl.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaDecl.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -7332,8 +7332,8 @@ // This includes arrays of objects with address space qualifiers, but not // automatic variables that point to other address spaces. // ISO/IEC TR 18037 S5.1.2 - if (!getLangOpts().OpenCL - && NewVD->hasLocalStorage() && T.getAddressSpace() != 0) { + if (!getLangOpts().OpenCL && NewVD->hasLocalStorage() && + T.getAddressSpace() != LangAS::Default) { Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl) << 0; NewVD->setInvalidDecl(); return; @@ -8833,7 +8833,7 @@ if (getLangOpts().OpenCL) { // OpenCL v1.1 s6.5: Using an address space qualifier in a function return // type declaration will generate a compilation error. - unsigned AddressSpace = NewFD->getReturnType().getAddressSpace(); + LangAS AddressSpace = NewFD->getReturnType().getAddressSpace(); if (AddressSpace != LangAS::Default) { Diag(NewFD->getLocation(), diag::err_opencl_return_value_with_address_space); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaDeclObjC.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaDeclObjC.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaDeclObjC.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaDeclObjC.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -4688,7 +4688,7 @@ // duration shall not be qualified by an address-space qualifier." // Since all parameters have automatic store duration, they can not have // an address space. - if (T.getAddressSpace() != 0) { + if (T.getAddressSpace() != LangAS::Default) { Diag(IdLoc, diag::err_arg_with_address_space); Invalid = true; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaExpr.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaExpr.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaExpr.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaExpr.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -5064,7 +5064,7 @@ } NeedsNewDecl = true; - unsigned AS = ArgType->getPointeeType().getQualifiers().getAddressSpace(); + LangAS AS = ArgType->getPointeeType().getAddressSpace(); QualType PointeeType = ParamType->getPointeeType(); PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); @@ -5760,8 +5760,8 @@ case Type::STK_ObjCObjectPointer: switch (DestTy->getScalarTypeKind()) { case Type::STK_CPointer: { - unsigned SrcAS = SrcTy->getPointeeType().getAddressSpace(); - unsigned DestAS = DestTy->getPointeeType().getAddressSpace(); + LangAS SrcAS = SrcTy->getPointeeType().getAddressSpace(); + LangAS DestAS = DestTy->getPointeeType().getAddressSpace(); if (SrcAS != DestAS) return CK_AddressSpaceConversion; return CK_BitCast; @@ -6365,9 +6365,9 @@ Qualifiers lhQual = lhptee.getQualifiers(); Qualifiers rhQual = rhptee.getQualifiers(); - unsigned ResultAddrSpace = 0; - unsigned LAddrSpace = lhQual.getAddressSpace(); - unsigned RAddrSpace = rhQual.getAddressSpace(); + LangAS ResultAddrSpace = LangAS::Default; + LangAS LAddrSpace = lhQual.getAddressSpace(); + LangAS RAddrSpace = rhQual.getAddressSpace(); if (S.getLangOpts().OpenCL) { // OpenCL v1.1 s6.5 - Conversion between pointers to distinct address // spaces is disallowed. @@ -7649,8 +7649,8 @@ if (const PointerType *LHSPointer = dyn_cast(LHSType)) { // U* -> T* if (isa(RHSType)) { - unsigned AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); - unsigned AddrSpaceR = RHSType->getPointeeType().getAddressSpace(); + LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); + LangAS AddrSpaceR = RHSType->getPointeeType().getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkPointerTypesForAssignment(*this, LHSType, RHSType); } @@ -7685,10 +7685,10 @@ // U^ -> void* if (RHSType->getAs()) { if (LHSPointer->getPointeeType()->isVoidType()) { - unsigned AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); - unsigned AddrSpaceR = RHSType->getAs() - ->getPointeeType() - .getAddressSpace(); + LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); + LangAS AddrSpaceR = RHSType->getAs() + ->getPointeeType() + .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return Compatible; @@ -7702,12 +7702,12 @@ if (isa(LHSType)) { // U^ -> T^ if (RHSType->isBlockPointerType()) { - unsigned AddrSpaceL = LHSType->getAs() - ->getPointeeType() - .getAddressSpace(); - unsigned AddrSpaceR = RHSType->getAs() - ->getPointeeType() - .getAddressSpace(); + LangAS AddrSpaceL = LHSType->getAs() + ->getPointeeType() + .getAddressSpace(); + LangAS AddrSpaceR = RHSType->getAs() + ->getPointeeType() + .getAddressSpace(); Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType); } @@ -9804,8 +9804,8 @@ << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } } - unsigned AddrSpaceL = LCanPointeeTy.getAddressSpace(); - unsigned AddrSpaceR = RCanPointeeTy.getAddressSpace(); + LangAS AddrSpaceL = LCanPointeeTy.getAddressSpace(); + LangAS AddrSpaceR = RCanPointeeTy.getAddressSpace(); CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; if (LHSIsNull && !RHSIsNull) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaExprCXX.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaExprCXX.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaExprCXX.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaExprCXX.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -2113,7 +2113,7 @@ else if (AllocType->isVariablyModifiedType()) return Diag(Loc, diag::err_variably_modified_new_type) << AllocType; - else if (AllocType.getAddressSpace()) + else if (AllocType.getAddressSpace() != LangAS::Default) return Diag(Loc, diag::err_address_space_qualified_new) << AllocType.getUnqualifiedType() << AllocType.getQualifiers().getAddressSpaceAttributePrintValue(); @@ -3185,7 +3185,7 @@ QualType Pointee = Type->getAs()->getPointeeType(); QualType PointeeElem = Context.getBaseElementType(Pointee); - if (Pointee.getAddressSpace()) + if (Pointee.getAddressSpace() != LangAS::Default) return Diag(Ex.get()->getLocStart(), diag::err_address_space_qualified_delete) << Pointee.getUnqualifiedType() @@ -4824,9 +4824,13 @@ } case BTT_IsSame: return Self.Context.hasSameType(LhsT, RhsT); - case BTT_TypeCompatible: - return Self.Context.typesAreCompatible(LhsT.getUnqualifiedType(), - RhsT.getUnqualifiedType()); + case BTT_TypeCompatible: { + // GCC ignores cv-qualifiers on arrays for this builtin. + Qualifiers LhsQuals, RhsQuals; + QualType Lhs = Self.getASTContext().getUnqualifiedArrayType(LhsT, LhsQuals); + QualType Rhs = Self.getASTContext().getUnqualifiedArrayType(RhsT, RhsQuals); + return Self.Context.typesAreCompatible(Lhs, Rhs); + } case BTT_IsConvertible: case BTT_IsConvertibleTo: { // C++0x [meta.rel]p4: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaTemplateDeduction.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaTemplateDeduction.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaTemplateDeduction.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaTemplateDeduction.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -1870,11 +1870,10 @@ Deduced); } - if (Arg.getAddressSpace() >= LangAS::FirstTargetAddressSpace) { + if (isTargetAddressSpace(Arg.getAddressSpace())) { llvm::APSInt ArgAddressSpace(S.Context.getTypeSize(S.Context.IntTy), false); - ArgAddressSpace = - (Arg.getAddressSpace() - LangAS::FirstTargetAddressSpace); + ArgAddressSpace = toTargetAddressSpace(Arg.getAddressSpace()); // Perform deduction on the pointer types. if (Sema::TemplateDeductionResult Result = diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaType.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaType.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Sema/SemaType.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Sema/SemaType.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -5631,7 +5631,7 @@ // If this type is already address space qualified, reject it. // ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "No type shall be qualified // by qualifiers for two or more different address spaces." - if (T.getAddressSpace()) { + if (T.getAddressSpace() != LangAS::Default) { Diag(AttrLoc, diag::err_attribute_address_multiple_qualifiers); return QualType(); } @@ -5655,15 +5655,16 @@ } llvm::APSInt max(addrSpace.getBitWidth()); - max = Qualifiers::MaxAddressSpace - LangAS::FirstTargetAddressSpace; + max = + Qualifiers::MaxAddressSpace - (unsigned)LangAS::FirstTargetAddressSpace; if (addrSpace > max) { Diag(AttrLoc, diag::err_attribute_address_space_too_high) << (unsigned)max.getZExtValue() << AddrSpace->getSourceRange(); return QualType(); } - unsigned ASIdx = static_cast(addrSpace.getZExtValue()) + - LangAS::FirstTargetAddressSpace; + LangAS ASIdx = + getLangASFromTargetAS(static_cast(addrSpace.getZExtValue())); return Context.getAddrSpaceQualType(T, ASIdx); } @@ -5689,7 +5690,7 @@ // If this type is already address space qualified, reject it. // ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "No type shall be qualified by // qualifiers for two or more different address spaces." - if (Type.getAddressSpace()) { + if (Type.getAddressSpace() != LangAS::Default) { S.Diag(Attr.getLoc(), diag::err_attribute_address_multiple_qualifiers); Attr.setInvalid(); return; @@ -5703,7 +5704,7 @@ return; } - unsigned ASIdx; + LangAS ASIdx; if (Attr.getKind() == AttributeList::AT_AddressSpace) { // Check the attribute arguments. @@ -7036,7 +7037,7 @@ (T->isVoidType() && !IsPointee)) return; - unsigned ImpAddr; + LangAS ImpAddr; // Put OpenCL automatic variable in private address space. // OpenCL v1.2 s6.5: // The default address space name for arguments to a function in a diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -23,6 +23,7 @@ #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Refactoring.h" #include "clang/Tooling/Refactoring/RefactoringAction.h" +#include "clang/Tooling/Refactoring/RefactoringDiagnostic.h" #include "clang/Tooling/Refactoring/RefactoringOptions.h" #include "clang/Tooling/Refactoring/Rename/SymbolName.h" #include "clang/Tooling/Refactoring/Rename/USRFinder.h" @@ -49,11 +50,9 @@ return Selection.takeError(); const NamedDecl *ND = getNamedDeclAt(Context.getASTContext(), Selection->getBegin()); - if (!ND) { - // FIXME: Use a diagnostic. - return llvm::make_error("no symbol selected", - llvm::inconvertibleErrorCode()); - } + if (!ND) + return Context.createDiagnosticError( + Selection->getBegin(), diag::err_refactor_selection_no_symbol); return getCanonicalSymbolDeclaration(ND); } }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -194,9 +194,48 @@ bool VisitDeclRefExpr(const DeclRefExpr *Expr) { const NamedDecl *Decl = Expr->getFoundDecl(); + // Get the underlying declaration of the shadow declaration introduced by a + // using declaration. + if (auto *UsingShadow = llvm::dyn_cast(Decl)) { + Decl = UsingShadow->getTargetDecl(); + } + + auto BeginLoc = Expr->getLocStart(); + auto EndLoc = Expr->getLocEnd(); + // In case of renaming an enum declaration, we have to explicitly handle + // unscoped enum constants referenced in expressions (e.g. + // "auto r = ns1::ns2::Green" where Green is an enum constant of an unscoped + // enum decl "ns1::ns2::Color") as these enum constants cannot be caught by + // TypeLoc. + if (const auto *T = llvm::dyn_cast(Decl)) { + // FIXME: Handle the enum constant without prefix qualifiers (`a = Green`) + // when renaming an unscoped enum declaration with a new namespace. + if (!Expr->hasQualifier()) + return true; + + if (const auto *ED = + llvm::dyn_cast_or_null(getClosestAncestorDecl(*T))) { + if (ED->isScoped()) + return true; + Decl = ED; + } + // The current fix would qualify "ns1::ns2::Green" as + // "ns1::ns2::Color::Green". + // + // Get the EndLoc of the replacement by moving 1 character backward ( + // to exclude the last '::'). + // + // ns1::ns2::Green; + // ^ ^^ + // BeginLoc |EndLoc of the qualifier + // new EndLoc + EndLoc = Expr->getQualifierLoc().getEndLoc().getLocWithOffset(-1); + assert(EndLoc.isValid() && + "The enum constant should have prefix qualifers."); + } if (isInUSRSet(Decl)) { - RenameInfo Info = {Expr->getSourceRange().getBegin(), - Expr->getSourceRange().getEnd(), + RenameInfo Info = {BeginLoc, + EndLoc, Decl, getClosestAncestorDecl(*Expr), Expr->getQualifier(), @@ -358,10 +397,13 @@ // Get the supported declaration from a given typeLoc. If the declaration type // is not supported, returns nullptr. // - // FIXME: support more types, e.g. enum, type alias. + // FIXME: support more types, e.g. type alias. const NamedDecl *getSupportedDeclFromTypeLoc(TypeLoc Loc) { if (const auto *RD = Loc.getType()->getAsCXXRecordDecl()) return RD; + if (const auto *ED = + llvm::dyn_cast_or_null(Loc.getType()->getAsTagDecl())) + return ED; return nullptr; } @@ -452,6 +494,23 @@ RenameInfo.FromDecl, NewName.startswith("::") ? NewName.str() : ("::" + NewName).str()); + } else { + // This fixes the case where type `T` is a parameter inside a function + // type (e.g. `std::function`) and the DeclContext of `T` + // becomes the translation unit. As a workaround, we simply use + // fully-qualified name here for all references whose `DeclContext` is + // the translation unit and ignore the possible existence of + // using-decls (in the global scope) that can shorten the replaced + // name. + llvm::StringRef ActualName = Lexer::getSourceText( + CharSourceRange::getTokenRange( + SourceRange(RenameInfo.Begin, RenameInfo.End)), + SM, TranslationUnitDecl->getASTContext().getLangOpts()); + // Add the leading "::" back if the name written in the code contains + // it. + if (ActualName.startswith("::") && !NewName.startswith("::")) { + ReplacedName = "::" + NewName.str(); + } } } // If the NewName contains leading "::", add it back. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Analysis/conversion.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Analysis/conversion.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Analysis/conversion.c 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Analysis/conversion.c 2017-10-17 14:41:55.000000000 +0000 @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -Wno-conversion -analyzer-checker=core,alpha.core.Conversion -verify %s +// RUN: %clang_analyze_cc1 -Wno-conversion -Wno-tautological-constant-compare -analyzer-checker=core,alpha.core.Conversion -verify %s unsigned char U8; signed char S8; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Analysis/null-deref-ps.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Analysis/null-deref-ps.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Analysis/null-deref-ps.c 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Analysis/null-deref-ps.c 2017-10-17 14:41:55.000000000 +0000 @@ -1,5 +1,5 @@ -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -analyzer-checker=core,deadcode,alpha.core -std=gnu99 -analyzer-store=region -analyzer-purge=none -verify %s -Wno-error=return-type -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -analyzer-checker=core,deadcode,alpha.core -std=gnu99 -analyzer-store=region -verify %s -Wno-error=return-type +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -Wno-tautological-constant-compare -Wtautological-unsigned-zero-compare -analyzer-checker=core,deadcode,alpha.core -std=gnu99 -analyzer-store=region -analyzer-purge=none -verify %s -Wno-error=return-type +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -Wno-tautological-constant-compare -Wtautological-unsigned-zero-compare -analyzer-checker=core,deadcode,alpha.core -std=gnu99 -analyzer-store=region -verify %s -Wno-error=return-type typedef unsigned uintptr_t; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGen/libcall-declarations.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGen/libcall-declarations.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGen/libcall-declarations.c 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGen/libcall-declarations.c 2017-10-17 14:41:55.000000000 +0000 @@ -330,9 +330,9 @@ // CHECK-NOERRNO: declare double @ldexp(double, i32) [[NUWRN]] // CHECK-NOERRNO: declare float @ldexpf(float, i32) [[NUWRN]] // CHECK-NOERRNO: declare x86_fp80 @ldexpl(x86_fp80, i32) [[NUWRN]] -// CHECK-NOERRNO: declare double @nan(i8*) [[NUW:#[0-9]+]] -// CHECK-NOERRNO: declare float @nanf(i8*) [[NUW]] -// CHECK-NOERRNO: declare x86_fp80 @nanl(i8*) [[NUW]] +// CHECK-NOERRNO: declare double @nan(i8*) [[NUWRO:#[0-9]+]] +// CHECK-NOERRNO: declare float @nanf(i8*) [[NUWRO]] +// CHECK-NOERRNO: declare x86_fp80 @nanl(i8*) [[NUWRO]] // CHECK-NOERRNO: declare double @pow(double, double) [[NUWRN]] // CHECK-NOERRNO: declare float @powf(float, float) [[NUWRN]] // CHECK-NOERRNO: declare x86_fp80 @powl(x86_fp80, x86_fp80) [[NUWRN]] @@ -539,9 +539,9 @@ // CHECK-ERRNO: declare double @fabs(double) [[NUWRN]] // CHECK-ERRNO: declare float @fabsf(float) [[NUWRN]] // CHECK-ERRNO: declare x86_fp80 @fabsl(x86_fp80) [[NUWRN]] -// CHECK-ERRNO: declare double @nan(i8*) [[NUW:#[0-9]+]] -// CHECK-ERRNO: declare float @nanf(i8*) [[NUW]] -// CHECK-ERRNO: declare x86_fp80 @nanl(i8*) [[NUW]] +// CHECK-ERRNO: declare double @nan(i8*) [[NUWRO:#[0-9]+]] +// CHECK-ERRNO: declare float @nanf(i8*) [[NUWRO]] +// CHECK-ERRNO: declare x86_fp80 @nanl(i8*) [[NUWRO]] // CHECK-ERRNO: declare double @ceil(double) [[NUWRN]] // CHECK-ERRNO: declare float @ceilf(float) [[NUWRN]] // CHECK-ERRNO: declare x86_fp80 @ceill(x86_fp80) [[NUWRN]] @@ -615,11 +615,7 @@ // CHECK-ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NUWRN]] // CHECK-NOERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} } -// CHECK-NOERRNO: attributes [[NUW]] = { nounwind -// CHECK-NOERRNO-NOT: readnone -// CHECK-NOERRNO-SAME: {{.*}} } +// CHECK-NOERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} } -// CHECK-ERRNO: attributes [[NUW]] = { nounwind -// CHECK-ERRNO-NOT: readnone -// CHECK-ERRNO-SAME: {{.*}} } // CHECK-ERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} } +// CHECK-ERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGen/tbaa-cast.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGen/tbaa-cast.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGen/tbaa-cast.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGen/tbaa-cast.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \ +// RUN: -emit-llvm -o - | FileCheck %s +// +// Check that we generate correct TBAA information for lvalues constructed +// with use of casts. + +struct V { + unsigned n; +}; + +struct S { + char bytes[4]; +}; + +void foo(S *p) { +// CHECK-LABEL: _Z3fooP1S +// CHECK: store i32 5, {{.*}}, !tbaa [[TAG_V_n:!.*]] + ((V*)p->bytes)->n = 5; +} + +// CHECK-DAG: [[TAG_V_n]] = !{[[TYPE_V:!.*]], [[TYPE_int:!.*]], i64 0} +// CHECK-DAG: [[TYPE_V]] = !{!"_ZTS1V", !{{.*}}, i64 0} +// CHECK-DAG: [[TYPE_int]] = !{!"int", !{{.*}}, i64 0} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGenCXX/finegrain-bitfield-access.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGenCXX/finegrain-bitfield-access.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGenCXX/finegrain-bitfield-access.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGenCXX/finegrain-bitfield-access.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -0,0 +1,162 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \ +// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffine-grained-bitfield-accesses \ +// RUN: -emit-llvm -fsanitize=address -o - %s | FileCheck %s --check-prefix=SANITIZE +// Check -fsplit-bitfields will be ignored since sanitizer is enabled. + +struct S1 { + unsigned f1:2; + unsigned f2:6; + unsigned f3:8; + unsigned f4:4; + unsigned f5:8; +}; + +S1 a1; +unsigned read8_1() { + // CHECK-LABEL: @_Z7read8_1v + // CHECK: %bf.load = load i8, i8* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 1), align 1 + // CHECK-NEXT: %bf.cast = zext i8 %bf.load to i32 + // CHECK-NEXT: ret i32 %bf.cast + // SANITIZE-LABEL: @_Z7read8_1v + // SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4 + // SANITIZE: %bf.lshr = lshr i32 %bf.load, 8 + // SANITIZE: %bf.clear = and i32 %bf.lshr, 255 + // SANITIZE: ret i32 %bf.clear + return a1.f3; +} +void write8_1() { + // CHECK-LABEL: @_Z8write8_1v + // CHECK: store i8 3, i8* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 1), align 1 + // CHECK-NEXT: ret void + // SANITIZE-LABEL: @_Z8write8_1v + // SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4 + // SANITIZE-NEXT: %bf.clear = and i32 %bf.load, -65281 + // SANITIZE-NEXT: %bf.set = or i32 %bf.clear, 768 + // SANITIZE-NEXT: store i32 %bf.set, i32* getelementptr inbounds {{.*}}, align 4 + // SANITIZE-NEXT: ret void + a1.f3 = 3; +} + +unsigned read8_2() { + // CHECK-LABEL: @_Z7read8_2v + // CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 2), align 2 + // CHECK-NEXT: %bf.lshr = lshr i16 %bf.load, 4 + // CHECK-NEXT: %bf.clear = and i16 %bf.lshr, 255 + // CHECK-NEXT: %bf.cast = zext i16 %bf.clear to i32 + // CHECK-NEXT: ret i32 %bf.cast + // SANITIZE-LABEL: @_Z7read8_2v + // SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4 + // SANITIZE-NEXT: %bf.lshr = lshr i32 %bf.load, 20 + // SANITIZE-NEXT: %bf.clear = and i32 %bf.lshr, 255 + // SANITIZE-NEXT: ret i32 %bf.clear + return a1.f5; +} +void write8_2() { + // CHECK-LABEL: @_Z8write8_2v + // CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 2), align 2 + // CHECK-NEXT: %bf.clear = and i16 %bf.load, -4081 + // CHECK-NEXT: %bf.set = or i16 %bf.clear, 48 + // CHECK-NEXT: store i16 %bf.set, i16* getelementptr inbounds (%struct.S1, %struct.S1* @a1, i32 0, i32 2), align 2 + // CHECK-NEXT: ret void + // SANITIZE-LABEL: @_Z8write8_2v + // SANITIZE: %bf.load = load i32, i32* getelementptr inbounds {{.*}}, align 4 + // SANITIZE-NEXT: %bf.clear = and i32 %bf.load, -267386881 + // SANITIZE-NEXT: %bf.set = or i32 %bf.clear, 3145728 + // SANITIZE-NEXT: store i32 %bf.set, i32* getelementptr inbounds {{.*}}, align 4 + // SANITIZE-NEXT: ret void + a1.f5 = 3; +} + +struct S2 { + unsigned long f1:16; + unsigned long f2:16; + unsigned long f3:6; +}; + +S2 a2; +unsigned read16_1() { + // CHECK-LABEL: @_Z8read16_1v + // CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 0), align 8 + // CHECK-NEXT: %bf.cast = zext i16 %bf.load to i64 + // CHECK-NEXT: %conv = trunc i64 %bf.cast to i32 + // CHECK-NEXT: ret i32 %conv + // SANITIZE-LABEL: @_Z8read16_1v + // SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8 + // SANITIZE-NEXT: %bf.clear = and i64 %bf.load, 65535 + // SANITIZE-NEXT: %conv = trunc i64 %bf.clear to i32 + // SANITIZE-NEXT: ret i32 %conv + return a2.f1; +} +unsigned read16_2() { + // CHECK-LABEL: @_Z8read16_2v + // CHECK: %bf.load = load i16, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 1), align 2 + // CHECK-NEXT: %bf.cast = zext i16 %bf.load to i64 + // CHECK-NEXT: %conv = trunc i64 %bf.cast to i32 + // CHECK-NEXT: ret i32 %conv + // SANITIZE-LABEL: @_Z8read16_2v + // SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8 + // SANITIZE-NEXT: %bf.lshr = lshr i64 %bf.load, 16 + // SANITIZE-NEXT: %bf.clear = and i64 %bf.lshr, 65535 + // SANITIZE-NEXT: %conv = trunc i64 %bf.clear to i32 + // SANITIZE-NEXT: ret i32 %conv + return a2.f2; +} + +void write16_1() { + // CHECK-LABEL: @_Z9write16_1v + // CHECK: store i16 5, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 0), align 8 + // CHECK-NEXT: ret void + // SANITIZE-LABEL: @_Z9write16_1v + // SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8 + // SANITIZE-NEXT: %bf.clear = and i64 %bf.load, -65536 + // SANITIZE-NEXT: %bf.set = or i64 %bf.clear, 5 + // SANITIZE-NEXT: store i64 %bf.set, i64* bitcast {{.*}}, align 8 + // SANITIZE-NEXT: ret void + a2.f1 = 5; +} +void write16_2() { + // CHECK-LABEL: @_Z9write16_2v + // CHECK: store i16 5, i16* getelementptr inbounds (%struct.S2, %struct.S2* @a2, i32 0, i32 1), align 2 + // CHECK-NEXT: ret void + // SANITIZE-LABEL: @_Z9write16_2v + // SANITIZE: %bf.load = load i64, i64* bitcast {{.*}}, align 8 + // SANITIZE-NEXT: %bf.clear = and i64 %bf.load, -4294901761 + // SANITIZE-NEXT: %bf.set = or i64 %bf.clear, 327680 + // SANITIZE-NEXT: store i64 %bf.set, i64* bitcast {{.*}}, align 8 + // SANITIZE-NEXT: ret void + a2.f2 = 5; +} + +struct S3 { + unsigned long f1:14; + unsigned long f2:18; + unsigned long f3:32; +}; + +S3 a3; +unsigned read32_1() { + // CHECK-LABEL: @_Z8read32_1v + // CHECK: %bf.load = load i32, i32* getelementptr inbounds (%struct.S3, %struct.S3* @a3, i32 0, i32 1), align 4 + // CHECK-NEXT: %bf.cast = zext i32 %bf.load to i64 + // CHECK-NEXT: %conv = trunc i64 %bf.cast to i32 + // CHECK-NEXT: ret i32 %conv + // SANITIZE-LABEL: @_Z8read32_1v + // SANITIZE: %bf.load = load i64, i64* getelementptr inbounds {{.*}}, align 8 + // SANITIZE-NEXT: %bf.lshr = lshr i64 %bf.load, 32 + // SANITIZE-NEXT: %conv = trunc i64 %bf.lshr to i32 + // SANITIZE-NEXT: ret i32 %conv + return a3.f3; +} +void write32_1() { + // CHECK-LABEL: @_Z9write32_1v + // CHECK: store i32 5, i32* getelementptr inbounds (%struct.S3, %struct.S3* @a3, i32 0, i32 1), align 4 + // CHECK-NEXT: ret void + // SANITIZE-LABEL: @_Z9write32_1v + // SANITIZE: %bf.load = load i64, i64* getelementptr inbounds {{.*}}, align 8 + // SANITIZE-NEXT: %bf.clear = and i64 %bf.load, 4294967295 + // SANITIZE-NEXT: %bf.set = or i64 %bf.clear, 21474836480 + // SANITIZE-NEXT: store i64 %bf.set, i64* getelementptr inbounds {{.*}}, align 8 + // SANITIZE-NEXT: ret void + a3.f3 = 5; +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGenOpenCL/atomic-ops.cl llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGenOpenCL/atomic-ops.cl --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CodeGenOpenCL/atomic-ops.cl 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CodeGenOpenCL/atomic-ops.cl 2017-10-17 14:41:55.000000000 +0000 @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s // Also test serialization of atomic operations here, to avoid duplicating the test. -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl -// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl +// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s #ifndef ALREADY_INCLUDED #define ALREADY_INCLUDED @@ -32,22 +32,22 @@ void fi1(atomic_int *i) { // CHECK-LABEL: @fi1 - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); - // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); } void fi2(atomic_int *i) { // CHECK-LABEL: @fi2 - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); } @@ -56,7 +56,7 @@ // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst @@ -65,25 +65,25 @@ void fi3(atomic_int *i, atomic_uint *ui) { // CHECK-LABEL: @fi3 - // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); - // CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); } bool fi4(atomic_int *i) { // CHECK-LABEL: @fi4( - // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] @@ -100,16 +100,16 @@ // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] // CHECK-NEXT: ] // CHECK: [[opencl_workgroup]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst // CHECK: br label %[[continue:.*]] // CHECK: [[opencl_device]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst // CHECK: br label %[[continue]] // CHECK: [[opencl_allsvmdevices]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst + // CHECK: load atomic i32, i32* %{{.*}} seq_cst // CHECK: br label %[[continue]] // CHECK: [[opencl_subgroup]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst // CHECK: br label %[[continue]] // CHECK: [[continue]]: int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); @@ -141,29 +141,29 @@ // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] // CHECK-NEXT: ] // CHECK: [[MON_WG]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic // CHECK: [[MON_DEV]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic // CHECK: [[MON_ALL]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic + // CHECK: load atomic i32, i32* %{{.*}} monotonic // CHECK: [[MON_SUB]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic // CHECK: [[ACQ_WG]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire // CHECK: [[ACQ_DEV]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire // CHECK: [[ACQ_ALL]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire + // CHECK: load atomic i32, i32* %{{.*}} acquire // CHECK: [[ACQ_SUB]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire // CHECK: [[SEQ_WG]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst // CHECK: [[SEQ_DEV]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst // CHECK: [[SEQ_ALL]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst + // CHECK: load atomic i32, i32* %{{.*}} seq_cst // CHECK: [[SEQ_SUB]]: - // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst int x = __opencl_atomic_load(i, order, scope); } @@ -181,7 +181,7 @@ float ff3(atomic_float *d) { // CHECK-LABEL: @ff3 - // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst + // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); } @@ -198,10 +198,10 @@ // CHECK-LABEL: @failureOrder void failureOrder(atomic_int *ptr, int *ptr2) { - // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); - // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire + // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); } @@ -279,11 +279,11 @@ // CHECK-LABEL: @test_volatile // CHECK: %[[i_addr:.*]] = alloca i32 // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 - // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]] - // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]] - // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst - // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]] - // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]] + // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] + // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] + // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst + // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] + // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] // CHECK-NEXT: ret i32 %[[retval]] return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CoverageMapping/deferred-region.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/CoverageMapping/deferred-region.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CoverageMapping/deferred-region.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CoverageMapping/deferred-region.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -31,11 +31,28 @@ // CHECK-LABEL: _Z3mazv: void maz() { if (true) - return; // CHECK: Gap,File 0, [[@LINE]]:11 -> 36:3 = (#0 - #1) + return; // CHECK: Gap,File 0, [[@LINE]]:11 -> [[@LINE+2]]:3 = (#0 - #1) return; // CHECK-NOT: Gap } +// CHECK-LABEL: _Z4maazv: +void maaz() { + if (true) + return; // CHECK: Gap,File 0, [[@LINE]]:11 + else + return; // CHECK-NOT: Gap,File 0, [[@LINE]] +} + +// CHECK-LABEL: _Z5maaazv: +void maaaz() { + if (true) { + return; + } else { // CHECK: Gap,File 0, [[@LINE]]:4 -> [[@LINE]]:10 + return; // CHECK-NOT: Gap,File 0, [[@LINE]] + } +} + // CHECK-LABEL: _Z3bari: void bar(int x) { IF (x) @@ -158,6 +175,9 @@ foo(1); fooo(0); fooo(1); + maz(); + maaz(); + maaaz(); baz(); bar(0); bar(1); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CoverageMapping/logical.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/CoverageMapping/logical.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CoverageMapping/logical.cpp 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CoverageMapping/logical.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -1,13 +1,18 @@ // RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only -main-file-name logical.cpp %s | FileCheck %s -int main() { // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+10]]:2 = #0 +int main() { // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+15]]:2 = #0 bool bt = true; bool bf = false; - bool a = bt && bf; // CHECK-NEXT: File 0, [[@LINE]]:18 -> [[@LINE]]:20 = #1 - a = bt && + bool a = bt && bf; // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE]]:14 = #0 + // CHECK-NEXT: File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:20 = #1 + + a = bt && // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 bf; // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #2 - a = bf || bt; // CHECK-NEXT: File 0, [[@LINE]]:13 -> [[@LINE]]:15 = #3 - a = bf || + + a = bf || bt; // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 + // CHECK-NEXT: File 0, [[@LINE-1]]:13 -> [[@LINE-1]]:15 = #3 + + a = bf || // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 bt; // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #4 return 0; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/CoverageMapping/macro-expansion.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/CoverageMapping/macro-expansion.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/CoverageMapping/macro-expansion.c 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/CoverageMapping/macro-expansion.c 2017-10-17 14:41:55.000000000 +0000 @@ -23,10 +23,12 @@ // CHECK-NEXT: Expansion,File 4, [[@LINE+2]]:20 -> [[@LINE+2]]:22 = (#0 + #8) // CHECK-NEXT: File 4, [[@LINE+1]]:36 -> [[@LINE+1]]:37 = (#0 + #8) #define M3(x) do { M2(x); } while (0) -// CHECK-NEXT: File 5, [[@LINE+2]]:15 -> [[@LINE+2]]:27 = #0 +// CHECK-NEXT: File 5, [[@LINE+3]]:15 -> [[@LINE+3]]:27 = #0 +// CHECK-NEXT: File 5, [[@LINE+2]]:16 -> [[@LINE+2]]:19 = #0 // CHECK-NEXT: File 5, [[@LINE+1]]:23 -> [[@LINE+1]]:26 = #12 #define M4(x) ((x) && (x)) -// CHECK-NEXT: File 6, [[@LINE+2]]:15 -> [[@LINE+2]]:27 = #0 +// CHECK-NEXT: File 6, [[@LINE+3]]:15 -> [[@LINE+3]]:27 = #0 +// CHECK-NEXT: File 6, [[@LINE+2]]:16 -> [[@LINE+2]]:19 = #0 // CHECK-NEXT: File 6, [[@LINE+1]]:23 -> [[@LINE+1]]:26 = #14 #define M5(x) ((x) || (x)) // CHECK-NEXT: File 7, [[@LINE+1]]:15 -> [[@LINE+1]]:26 = #0 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Driver/cl-options.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Driver/cl-options.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Driver/cl-options.c 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Driver/cl-options.c 2017-10-17 14:41:55.000000000 +0000 @@ -521,7 +521,7 @@ // STDCXX14: -std=c++14 // RUN: %clang_cl -fmsc-version=1900 -TP -std:c++17 -### -- %s 2>&1 | FileCheck -check-prefix=STDCXX17 %s -// STDCXX14: -std=c++17 +// STDCXX17: -std=c++17 // RUN: %clang_cl -fmsc-version=1900 -TP -std:c++latest -### -- %s 2>&1 | FileCheck -check-prefix=STDCXXLATEST %s // STDCXXLATEST: -std=c++2a diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Driver/cuda-detect.cu llvm-toolchain-snapshot-6.0~svn316003/clang/test/Driver/cuda-detect.cu --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Driver/cuda-detect.cu 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Driver/cuda-detect.cu 2017-10-17 14:41:55.000000000 +0000 @@ -2,7 +2,7 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target // -// # Check that we properly detect CUDA installation. +// Check that we properly detect CUDA installation. // RUN: %clang -v --target=i386-unknown-linux \ // RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA // RUN: %clang -v --target=i386-apple-macosx \ @@ -18,6 +18,19 @@ // RUN: %clang -v --target=i386-apple-macosx \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s +// Check that we don't find a CUDA installation without libdevice ... +// RUN: %clang -v --target=i386-unknown-linux \ +// RUN: --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NOCUDA +// RUN: %clang -v --target=i386-apple-macosx \ +// RUN: --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NOCUDA + +// ... unless the user doesn't need libdevice +// RUN: %clang -v --target=i386-unknown-linux -nocudalib \ +// RUN: --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE +// RUN: %clang -v --target=i386-apple-macosx -nocudalib \ +// RUN: --sysroot=%S/Inputs/CUDA-nolibdevice 2>&1 | FileCheck %s -check-prefix NO-LIBDEVICE + + // Make sure we map libdevice bitcode files to proper GPUs. These // tests use Inputs/CUDA_80 which has full set of libdevice files. // However, libdevice mapping only matches CUDA-7.x at the moment. @@ -112,6 +125,7 @@ // RUN: | FileCheck %s --check-prefix CHECK-CXXINCLUDE // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda +// NO-LIBDEVICE: Found CUDA installation: {{.*}}/Inputs/CUDA-nolibdevice/usr/local/cuda // NOCUDA-NOT: Found CUDA installation: // MISSINGLIBDEVICE: error: cannot find libdevice for sm_20. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Driver/sanitizer-ld.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Driver/sanitizer-ld.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Driver/sanitizer-ld.c 2017-10-15 17:43:55.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Driver/sanitizer-ld.c 2017-10-17 14:41:55.000000000 +0000 @@ -508,6 +508,24 @@ // CHECK-CFI-CROSS-DSO-DIAG-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.cfi_diag-x86_64.a" "-no-whole-archive" // CHECK-CFI-CROSS-DSO-DIAG-LINUX: -export-dynamic +// Cross-DSO CFI on Android does not link runtime libraries. +// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \ +// RUN: -target aarch64-linux-android -fuse-ld=ld \ +// RUN: --sysroot=%S/Inputs/basic_android_tree \ +// RUN: | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-ANDROID %s +// CHECK-CFI-CROSS-DSO-ANDROID: "{{.*}}ld{{(.exe)?}}" +// CHECK-CFI-CROSS-DSO-ANDROID-NOT: libclang_rt. + +// Cross-DSO CFI with diagnostics on Android links just the UBSAN runtime. +// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \ +// RUN: -fno-sanitize-trap=cfi -fsanitize-recover=cfi \ +// RUN: -target aarch64-linux-android -fuse-ld=ld \ +// RUN: --sysroot=%S/Inputs/basic_android_tree \ +// RUN: | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-ANDROID %s +// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{.*}}ld{{(.exe)?}}" +// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{[^"]*}}libclang_rt.ubsan_standalone-aarch64-android.so" +// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "-export-dynamic-symbol=__cfi_check" + // RUN: %clangxx -fsanitize=address %s -### -o %t.o 2>&1 \ // RUN: -mmacosx-version-min=10.6 \ // RUN: -target x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \ @@ -596,26 +614,6 @@ // CHECK-SAFESTACK-ANDROID-AARCH64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-SAFESTACK-ANDROID-AARCH64-NOT: libclang_rt.safestack -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=cfi \ -// RUN: --sysroot=%S/Inputs/basic_android_tree \ -// RUN: | FileCheck --check-prefix=CHECK-CFI-ANDROID %s -// -// CHECK-CFI-ANDROID: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-CFI-ANDROID-NOT: libclang_rt.cfi -// CHECK-CFI-ANDROID-NOT: __cfi_check - -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=cfi \ -// RUN: -fsanitize-cfi-cross-dso \ -// RUN: --sysroot=%S/Inputs/basic_android_tree \ -// RUN: | FileCheck --check-prefix=CHECK-CROSSDSO-CFI-ANDROID %s -// -// CHECK-CROSSDSO-CFI-ANDROID: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-CROSSDSO-CFI-ANDROID-NOT: libclang_rt.cfi -// CHECK-CROSSDSO-CFI-ANDROID: -export-dynamic-symbol=__cfi_check -// CHECK-CROSSDSO-CFI-ANDROID-NOT: libclang_rt.cfi - // RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \ // RUN: -target x86_64-scei-ps4 -fuse-ld=ld \ // RUN: -shared \ diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Index/annotate-attribute.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/Index/annotate-attribute.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Index/annotate-attribute.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Index/annotate-attribute.cpp 2017-10-17 14:41:55.000000000 +0000 @@ -16,6 +16,12 @@ void methodWithoutAttribute(); }; +template +class __attribute__((annotate("works"))) TemplateTest {}; + +template +int templateFunction(T value) __attribute__((annotate("works"))); + // CHECK: ClassDecl=Test:3:7 (Definition) Extent=[3:1 - 17:2] // CHECK-NEXT: CXXAccessSpecifier=:4:1 (Definition) Extent=[4:1 - 4:8] // CHECK-NEXT: CXXMethod=aMethod:5:51 Extent=[5:3 - 5:60] @@ -31,3 +37,9 @@ // CHECK-NEXT: CompoundStmt= Extent=[12:23 - 12:25] // CHECK-NEXT: CXXAccessSpecifier=:14:1 (Definition) Extent=[14:1 - 14:11] // CHECK-NEXT: CXXMethod=methodWithoutAttribute:16:8 Extent=[16:3 - 16:32] +// CHECK: ClassTemplate=TemplateTest:20:42 (Definition) Extent=[19:1 - 20:57] +// CHECK-NEXT: TemplateTypeParameter=T:19:20 (Definition) Extent=[19:11 - 19:21] [access=public] +// CHECK-NEXT: attribute(annotate)=works Extent=[20:22 - 20:39] +// CHECK: FunctionTemplate=templateFunction:23:5 Extent=[22:1 - 23:65] +// CHECK-NEXT: TemplateTypeParameter=T:22:20 (Definition) Extent=[22:11 - 22:21] [access=public] +// CHECK-NEXT: attribute(annotate)=works Extent=[23:46 - 23:63] diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/OpenMP/is_initial_device.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/OpenMP/is_initial_device.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/OpenMP/is_initial_device.c 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/OpenMP/is_initial_device.c 2017-10-17 14:41:55.000000000 +0000 @@ -0,0 +1,36 @@ +// REQUIRES: powerpc-registered-target + +// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown \ +// RUN: -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x ir -triple powerpc64le-unknown-unknown -emit-llvm \ +// RUN: %t-ppc-host.bc -o - | FileCheck %s -check-prefixes HOST,OUTLINED +// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -emit-llvm -fopenmp-is-device \ +// RUN: %s -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefixes DEVICE,OUTLINED + +// expected-no-diagnostics +int check() { + int host = omp_is_initial_device(); + int device; +#pragma omp target map(tofrom: device) + { + device = omp_is_initial_device(); + } + + return host + device; +} + +// The host should get a value of 1: +// HOST: define{{.*}} @check() +// HOST: [[HOST:%.*]] = alloca i32 +// HOST: store i32 1, i32* [[HOST]] + +// OUTLINED: define{{.*}} @{{.*}}omp_offloading{{.*}}(i32*{{.*}} [[DEVICE_ARGUMENT:%.*]]) +// OUTLINED: [[DEVICE_ADDR_STORAGE:%.*]] = alloca i32* +// OUTLINED: store i32* [[DEVICE_ARGUMENT]], i32** [[DEVICE_ADDR_STORAGE]] +// OUTLINED: [[DEVICE_ADDR:%.*]] = load i32*, i32** [[DEVICE_ADDR_STORAGE]] + +// The outlined function that is called as fallback also runs on the host: +// HOST: store i32 1, i32* [[DEVICE_ADDR]] + +// The device should get a value of 0: +// DEVICE: store i32 0, i32* [[DEVICE_ADDR]] diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Parser/builtin_types_compatible.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Parser/builtin_types_compatible.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Parser/builtin_types_compatible.c 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Parser/builtin_types_compatible.c 2017-10-17 14:41:56.000000000 +0000 @@ -41,3 +41,20 @@ } +enum E1 { E1Foo }; +enum E2 { E2Foo }; + +static void testGccCompatibility() { + _Static_assert(__builtin_types_compatible_p(const volatile int, int), ""); + _Static_assert(__builtin_types_compatible_p(int[5], int[]), ""); + _Static_assert(!__builtin_types_compatible_p(int[5], int[4]), ""); + _Static_assert(!__builtin_types_compatible_p(int *, int **), ""); + _Static_assert(!__builtin_types_compatible_p(const int *, int *), ""); + _Static_assert(!__builtin_types_compatible_p(enum E1, enum E2), ""); + + // GCC's __builtin_types_compatible_p ignores qualifiers on arrays. + _Static_assert(__builtin_types_compatible_p(const int[4], int[4]), ""); + _Static_assert(__builtin_types_compatible_p(int[4], const int[4]), ""); + _Static_assert(__builtin_types_compatible_p(const int[5][4], int[][4]), ""); + _Static_assert(!__builtin_types_compatible_p(const int(*)[], int(*)[]), ""); +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Preprocessor/print-assembler.s llvm-toolchain-snapshot-6.0~svn316003/clang/test/Preprocessor/print-assembler.s --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Preprocessor/print-assembler.s 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Preprocessor/print-assembler.s 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -E -x assembler-with-cpp %s -o - | FileCheck %s --strict-whitespace + +.intel_syntax noprefix +.text + .global _main +_main: +# asdf +# asdf + mov bogus_name, 20 + mov rax, 5 + ret + +// CHECK-LABEL: _main: +// CHECK-NEXT: {{^}} # asdf +// CHECK-NEXT: {{^}} # asdf +// CHECK-NEXT: mov bogus_name, 20 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Refactor/LocalRename/NoSymbolSelectedError.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/Refactor/LocalRename/NoSymbolSelectedError.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Refactor/LocalRename/NoSymbolSelectedError.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Refactor/LocalRename/NoSymbolSelectedError.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,8 @@ +// RUN: not clang-refactor local-rename -selection=%s:4:1 -new-name=Bar %s -- 2>&1 | FileCheck %s +// RUN: clang-refactor local-rename -selection=test:%s -new-name=Bar %s -- 2>&1 | FileCheck --check-prefix=TESTCHECK %s + +class Baz { // CHECK: [[@LINE]]:1: error: there is no symbol at the given location +}; +/*range=*/; +// TESTCHECK: 1 '' results: +// TESTCHECK-NEXT: there is no symbol at the given location diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Refactor/tool-apply-replacements.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/test/Refactor/tool-apply-replacements.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Refactor/tool-apply-replacements.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Refactor/tool-apply-replacements.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,11 @@ +// RUN: rm -f %t.cp.cpp +// RUN: cp %s %t.cp.cpp +// RUN: clang-refactor local-rename -selection=%t.cp.cpp:9:7 -new-name=test %t.cp.cpp -- +// RUN: grep -v CHECK %t.cp.cpp | FileCheck %t.cp.cpp +// RUN: cp %s %t.cp.cpp +// RUN: clang-refactor local-rename -selection=%t.cp.cpp:9:7-9:15 -new-name=test %t.cp.cpp -- +// RUN: grep -v CHECK %t.cp.cpp | FileCheck %t.cp.cpp + +class RenameMe { +// CHECK: class test { +}; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Refactor/tool-selection-option.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Refactor/tool-selection-option.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Refactor/tool-selection-option.c 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Refactor/tool-selection-option.c 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,15 @@ +// RUN: rm -f %t.cp.c +// RUN: cp %s %t.cp.c +// RUN: clang-refactor local-rename -selection=%t.cp.c:6:5 -new-name=test -v %t.cp.c -- | FileCheck --check-prefix=CHECK1 %s +// RUN: clang-refactor local-rename -selection=%t.cp.c:6:5-6:9 -new-name=test -v %t.cp.c -- | FileCheck --check-prefix=CHECK2 %s + +int test; + +// CHECK1: invoking action 'local-rename': +// CHECK1-NEXT: -selection={{.*}}.cp.c:6:5 -> {{.*}}.cp.c:6:5 + +// CHECK2: invoking action 'local-rename': +// CHECK2-NEXT: -selection={{.*}}.cp.c:6:5 -> {{.*}}.cp.c:6:9 + +// RUN: not clang-refactor local-rename -selection=%s:6:5 -new-name=test -v %t.cp.c -- 2>&1 | FileCheck --check-prefix=CHECK-FILE-ERR %s +// CHECK-FILE-ERR: given file is not in the target TU diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Sema/outof-range-constant-compare.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Sema/outof-range-constant-compare.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Sema/outof-range-constant-compare.c 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Sema/outof-range-constant-compare.c 2017-10-17 14:41:56.000000000 +0000 @@ -7,58 +7,6 @@ { int a = value(); - if (a == 0x0000000000000000L) - return 0; - if (a != 0x0000000000000000L) - return 0; - if (a < 0x0000000000000000L) - return 0; - if (a <= 0x0000000000000000L) - return 0; - if (a > 0x0000000000000000L) - return 0; - if (a >= 0x0000000000000000L) - return 0; - - if (0x0000000000000000L == a) - return 0; - if (0x0000000000000000L != a) - return 0; - if (0x0000000000000000L < a) - return 0; - if (0x0000000000000000L <= a) - return 0; - if (0x0000000000000000L > a) - return 0; - if (0x0000000000000000L >= a) - return 0; - - if (a == 0x0000000000000000UL) - return 0; - if (a != 0x0000000000000000UL) - return 0; - if (a < 0x0000000000000000UL) // expected-warning {{comparison of unsigned expression < 0 is always false}} - return 0; - if (a <= 0x0000000000000000UL) - return 0; - if (a > 0x0000000000000000UL) - return 0; - if (a >= 0x0000000000000000UL) // expected-warning {{comparison of unsigned expression >= 0 is always true}} - return 0; - - if (0x0000000000000000UL == a) - return 0; - if (0x0000000000000000UL != a) - return 0; - if (0x0000000000000000UL < a) - return 0; - if (0x0000000000000000UL <= a) // expected-warning {{comparison of 0 <= unsigned expression is always true}} - return 0; - if (0x0000000000000000UL > a) // expected-warning {{comparison of 0 > unsigned expression is always false}} - return 0; - if (0x0000000000000000UL >= a) - return 0; - if (a == 0x1234567812345678L) // expected-warning {{comparison of constant 1311768465173141112 with expression of type 'int' is always false}} return 0; if (a != 0x1234567812345678L) // expected-warning {{comparison of constant 1311768465173141112 with expression of type 'int' is always true}} @@ -155,113 +103,6 @@ if (0x1234567812345678L >= l) return 0; - unsigned un = 0; - if (un == 0x0000000000000000L) - return 0; - if (un != 0x0000000000000000L) - return 0; - if (un < 0x0000000000000000L) // expected-warning {{comparison of unsigned expression < 0 is always false}} - return 0; - if (un <= 0x0000000000000000L) - return 0; - if (un > 0x0000000000000000L) - return 0; - if (un >= 0x0000000000000000L) // expected-warning {{comparison of unsigned expression >= 0 is always true}} - return 0; - - if (0x0000000000000000L == un) - return 0; - if (0x0000000000000000L != un) - return 0; - if (0x0000000000000000L < un) - return 0; - if (0x0000000000000000L <= un) // expected-warning {{comparison of 0 <= unsigned expression is always true}} - return 0; - if (0x0000000000000000L > un) // expected-warning {{comparison of 0 > unsigned expression is always false}} - return 0; - if (0x0000000000000000L >= un) - return 0; - - if (un == 0x0000000000000000UL) - return 0; - if (un != 0x0000000000000000UL) - return 0; - if (un < 0x0000000000000000UL) // expected-warning {{comparison of unsigned expression < 0 is always false}} - return 0; - if (un <= 0x0000000000000000UL) - return 0; - if (un > 0x0000000000000000UL) - return 0; - if (un >= 0x0000000000000000UL) // expected-warning {{comparison of unsigned expression >= 0 is always true}} - return 0; - - if (0x0000000000000000UL == un) - return 0; - if (0x0000000000000000UL != un) - return 0; - if (0x0000000000000000UL < un) - return 0; - if (0x0000000000000000UL <= un) // expected-warning {{comparison of 0 <= unsigned expression is always true}} - return 0; - if (0x0000000000000000UL > un) // expected-warning {{comparison of 0 > unsigned expression is always false}} - return 0; - if (0x0000000000000000UL >= un) - return 0; - - float fl = 0; - if (fl == 0x0000000000000000L) - return 0; - if (fl != 0x0000000000000000L) - return 0; - if (fl < 0x0000000000000000L) - return 0; - if (fl <= 0x0000000000000000L) - return 0; - if (fl > 0x0000000000000000L) - return 0; - if (fl >= 0x0000000000000000L) - return 0; - - if (0x0000000000000000L == fl) - return 0; - if (0x0000000000000000L != fl) - return 0; - if (0x0000000000000000L < fl) - return 0; - if (0x0000000000000000L <= fl) - return 0; - if (0x0000000000000000L > fl) - return 0; - if (0x0000000000000000L >= fl) - return 0; - - double dl = 0; - if (dl == 0x0000000000000000L) - return 0; - if (dl != 0x0000000000000000L) - return 0; - if (dl < 0x0000000000000000L) - return 0; - if (dl <= 0x0000000000000000L) - return 0; - if (dl > 0x0000000000000000L) - return 0; - if (dl >= 0x0000000000000000L) - return 0; - - if (0x0000000000000000L == dl) - return 0; - if (0x0000000000000000L != dl) - return 0; - if (0x0000000000000000L < dl) - return 0; - if (0x0000000000000000L <= dl) - return 0; - if (0x0000000000000000L > dl) - return 0; - if (0x0000000000000000L >= dl) - return 0; - enum E { yes, no, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Sema/tautological-constant-compare.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Sema/tautological-constant-compare.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Sema/tautological-constant-compare.c 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Sema/tautological-constant-compare.c 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,514 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -DTEST -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wno-tautological-constant-compare -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -DTEST -verify -x c++ %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wno-tautological-constant-compare -verify -x c++ %s + +int value(void); + +#define macro(val) val + +#ifdef __cplusplus +template +void TFunc() { + // Make sure that we do warn for normal variables in template functions ! + unsigned char c = value(); +#ifdef TEST + if (c > 255) // expected-warning {{comparison 'unsigned char' > 255 is always false}} + return; +#else + if (c > 255) + return; +#endif + + if (c > macro(255)) + return; + + T v = value(); + if (v > 255) + return; + if (v > 32767) + return; +} +#endif + +int main() +{ +#ifdef __cplusplus + TFunc(); + TFunc(); +#endif + + short s = value(); + +#ifdef TEST + if (s == 32767) + return 0; + if (s != 32767) + return 0; + if (s < 32767) + return 0; + if (s <= 32767) // expected-warning {{comparison 'short' <= 32767 is always true}} + return 0; + if (s > 32767) // expected-warning {{comparison 'short' > 32767 is always false}} + return 0; + if (s >= 32767) + return 0; + + if (32767 == s) + return 0; + if (32767 != s) + return 0; + if (32767 < s) // expected-warning {{comparison 32767 < 'short' is always false}} + return 0; + if (32767 <= s) + return 0; + if (32767 > s) + return 0; + if (32767 >= s) // expected-warning {{comparison 32767 >= 'short' is always true}} + return 0; + + // FIXME: assumes two's complement + if (s == -32768) + return 0; + if (s != -32768) + return 0; + if (s < -32768) // expected-warning {{comparison 'short' < -32768 is always false}} + return 0; + if (s <= -32768) + return 0; + if (s > -32768) + return 0; + if (s >= -32768) // expected-warning {{comparison 'short' >= -32768 is always true}} + return 0; + + if (-32768 == s) + return 0; + if (-32768 != s) + return 0; + if (-32768 < s) + return 0; + if (-32768 <= s) // expected-warning {{comparison -32768 <= 'short' is always true}} + return 0; + if (-32768 > s) // expected-warning {{comparison -32768 > 'short' is always false}} + return 0; + if (-32768 >= s) + return 0; + + if (s == 32767UL) + return 0; + if (s != 32767UL) + return 0; + if (s < 32767UL) + return 0; + if (s <= 32767UL) // expected-warning {{comparison 'short' <= 32767 is always true}} + return 0; + if (s > 32767UL) // expected-warning {{comparison 'short' > 32767 is always false}} + return 0; + if (s >= 32767UL) + return 0; + + if (32767UL == s) + return 0; + if (32767UL != s) + return 0; + if (32767UL < s) // expected-warning {{comparison 32767 < 'short' is always false}} + return 0; + if (32767UL <= s) + return 0; + if (32767UL > s) + return 0; + if (32767UL >= s) // expected-warning {{comparison 32767 >= 'short' is always true}} + return 0; + + // FIXME: assumes two's complement + if (s == -32768L) + return 0; + if (s != -32768L) + return 0; + if (s < -32768L) // expected-warning {{comparison 'short' < -32768 is always false}} + return 0; + if (s <= -32768L) + return 0; + if (s > -32768L) + return 0; + if (s >= -32768L) // expected-warning {{comparison 'short' >= -32768 is always true}} + return 0; + + if (-32768L == s) + return 0; + if (-32768L != s) + return 0; + if (-32768L < s) + return 0; + if (-32768L <= s) // expected-warning {{comparison -32768 <= 'short' is always true}} + return 0; + if (-32768L > s) // expected-warning {{comparison -32768 > 'short' is always false}} + return 0; + if (-32768L >= s) + return 0; +#else + // expected-no-diagnostics + if (s == 32767) + return 0; + if (s != 32767) + return 0; + if (s < 32767) + return 0; + if (s <= 32767) + return 0; + if (s > 32767) + return 0; + if (s >= 32767) + return 0; + + if (32767 == s) + return 0; + if (32767 != s) + return 0; + if (32767 < s) + return 0; + if (32767 <= s) + return 0; + if (32767 > s) + return 0; + if (32767 >= s) + return 0; + + // FIXME: assumes two's complement + if (s == -32768) + return 0; + if (s != -32768) + return 0; + if (s < -32768) + return 0; + if (s <= -32768) + return 0; + if (s > -32768) + return 0; + if (s >= -32768) + return 0; + + if (-32768 == s) + return 0; + if (-32768 != s) + return 0; + if (-32768 < s) + return 0; + if (-32768 <= s) + return 0; + if (-32768 > s) + return 0; + if (-32768 >= s) + return 0; + + if (s == 32767UL) + return 0; + if (s != 32767UL) + return 0; + if (s < 32767UL) + return 0; + if (s <= 32767UL) + return 0; + if (s > 32767UL) + return 0; + if (s >= 32767UL) + return 0; + + if (32767UL == s) + return 0; + if (32767UL != s) + return 0; + if (32767UL < s) + return 0; + if (32767UL <= s) + return 0; + if (32767UL > s) + return 0; + if (32767UL >= s) + return 0; + + // FIXME: assumes two's complement + if (s == -32768L) + return 0; + if (s != -32768L) + return 0; + if (s < -32768L) + return 0; + if (s <= -32768L) + return 0; + if (s > -32768L) + return 0; + if (s >= -32768L) + return 0; + + if (-32768L == s) + return 0; + if (-32768L != s) + return 0; + if (-32768L < s) + return 0; + if (-32768L <= s) + return 0; + if (-32768L > s) + return 0; + if (-32768L >= s) + return 0; +#endif + + if (s == 0) + return 0; + if (s != 0) + return 0; + if (s < 0) + return 0; + if (s <= 0) + return 0; + if (s > 0) + return 0; + if (s >= 0) + return 0; + + if (0 == s) + return 0; + if (0 != s) + return 0; + if (0 < s) + return 0; + if (0 <= s) + return 0; + if (0 > s) + return 0; + if (0 >= s) + return 0; + + // However the comparison with 0U would warn + + unsigned short us = value(); + +#ifdef TEST + if (us == 65535) + return 0; + if (us != 65535) + return 0; + if (us < 65535) + return 0; + if (us <= 65535) // expected-warning {{comparison 'unsigned short' <= 65535 is always true}} + return 0; + if (us > 65535) // expected-warning {{comparison 'unsigned short' > 65535 is always false}} + return 0; + if (us >= 65535) + return 0; + + if (65535 == us) + return 0; + if (65535 != us) + return 0; + if (65535 < us) // expected-warning {{comparison 65535 < 'unsigned short' is always false}} + return 0; + if (65535 <= us) + return 0; + if (65535 > us) + return 0; + if (65535 >= us) // expected-warning {{comparison 65535 >= 'unsigned short' is always true}} + return 0; + + if (us == 65535UL) + return 0; + if (us != 65535UL) + return 0; + if (us < 65535UL) + return 0; + if (us <= 65535UL) // expected-warning {{comparison 'unsigned short' <= 65535 is always true}} + return 0; + if (us > 65535UL) // expected-warning {{comparison 'unsigned short' > 65535 is always false}} + return 0; + if (us >= 65535UL) + return 0; + + if (65535UL == us) + return 0; + if (65535UL != us) + return 0; + if (65535UL < us) // expected-warning {{comparison 65535 < 'unsigned short' is always false}} + return 0; + if (65535UL <= us) + return 0; + if (65535UL > us) + return 0; + if (65535UL >= us) // expected-warning {{comparison 65535 >= 'unsigned short' is always true}} + return 0; +#else + // expected-no-diagnostics + if (us == 65535) + return 0; + if (us != 65535) + return 0; + if (us < 65535) + return 0; + if (us <= 65535) + return 0; + if (us > 65535) + return 0; + if (us >= 65535) + return 0; + + if (65535 == us) + return 0; + if (65535 != us) + return 0; + if (65535 < us) + return 0; + if (65535 <= us) + return 0; + if (65535 > us) + return 0; + if (65535 >= us) + return 0; + + if (us == 65535UL) + return 0; + if (us != 65535UL) + return 0; + if (us < 65535UL) + return 0; + if (us <= 65535UL) + return 0; + if (us > 65535UL) + return 0; + if (us >= 65535UL) + return 0; + + if (65535UL == us) + return 0; + if (65535UL != us) + return 0; + if (65535UL < us) + return 0; + if (65535UL <= us) + return 0; + if (65535UL > us) + return 0; + if (65535UL >= us) + return 0; +#endif + + if (us == 32767) + return 0; + if (us != 32767) + return 0; + if (us < 32767) + return 0; + if (us <= 32767) + return 0; + if (us > 32767) + return 0; + if (us >= 32767) + return 0; + + if (32767 == us) + return 0; + if (32767 != us) + return 0; + if (32767 < us) + return 0; + if (32767 <= us) + return 0; + if (32767 > us) + return 0; + if (32767 >= us) + return 0; + + if (us == 32767UL) + return 0; + if (us != 32767UL) + return 0; + if (us < 32767UL) + return 0; + if (us <= 32767UL) + return 0; + if (us > 32767UL) + return 0; + if (us >= 32767UL) + return 0; + + if (32767UL == us) + return 0; + if (32767UL != us) + return 0; + if (32767UL < us) + return 0; + if (32767UL <= us) + return 0; + if (32767UL > us) + return 0; + if (32767UL >= us) + return 0; + +#if __SIZEOF_INT128__ + __int128 i128; + if (i128 == -1) // used to crash + return 0; +#endif + + + enum E { + yes, + no, + maybe + }; + enum E e; + + if (e == yes) + return 0; + if (e != yes) + return 0; + if (e < yes) + return 0; + if (e <= yes) + return 0; + if (e > yes) + return 0; + if (e >= yes) + return 0; + + if (yes == e) + return 0; + if (yes != e) + return 0; + if (yes < e) + return 0; + if (yes <= e) + return 0; + if (yes > e) + return 0; + if (yes >= e) + return 0; + + if (e == maybe) + return 0; + if (e != maybe) + return 0; + if (e < maybe) + return 0; + if (e <= maybe) + return 0; + if (e > maybe) + return 0; + if (e >= maybe) + return 0; + + if (maybe == e) + return 0; + if (maybe != e) + return 0; + if (maybe < e) + return 0; + if (maybe <= e) + return 0; + if (maybe > e) + return 0; + if (maybe >= e) + return 0; + + return 1; +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/test/Sema/tautological-unsigned-zero-compare.c llvm-toolchain-snapshot-6.0~svn316003/clang/test/Sema/tautological-unsigned-zero-compare.c --- llvm-toolchain-snapshot-6.0~svn315865/clang/test/Sema/tautological-unsigned-zero-compare.c 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/test/Sema/tautological-unsigned-zero-compare.c 2017-10-17 14:41:56.000000000 +0000 @@ -1,47 +1,370 @@ // RUN: %clang_cc1 -fsyntax-only -DTEST -verify %s // RUN: %clang_cc1 -fsyntax-only -Wno-tautological-unsigned-zero-compare -verify %s +// RUN: %clang_cc1 -fsyntax-only -DTEST -verify -x c++ %s +// RUN: %clang_cc1 -fsyntax-only -Wno-tautological-unsigned-zero-compare -verify -x c++ %s -unsigned value(void); +unsigned uvalue(void); +signed int svalue(void); -int main() { - unsigned un = value(); +#define macro(val) val +#ifdef __cplusplus +template +void TFunc() { + // Make sure that we do warn for normal variables in template functions ! + unsigned char c = svalue(); #ifdef TEST + if (c < 0) // expected-warning {{comparison of unsigned expression < 0 is always false}} + return; +#else + if (c < 0) + return; +#endif + + if (c < macro(0)) + return; + + T v = svalue(); + if (v < 0) + return; +} +#endif + +int main() +{ +#ifdef __cplusplus + TFunc(); + TFunc(); +#endif + + unsigned un = uvalue(); + +#ifdef TEST + if (un == 0) + return 0; + if (un != 0) + return 0; if (un < 0) // expected-warning {{comparison of unsigned expression < 0 is always false}} - return 0; + return 0; + if (un <= 0) + return 0; + if (un > 0) + return 0; if (un >= 0) // expected-warning {{comparison of unsigned expression >= 0 is always true}} - return 0; + return 0; + + if (0 == un) + return 0; + if (0 != un) + return 0; + if (0 < un) + return 0; if (0 <= un) // expected-warning {{comparison of 0 <= unsigned expression is always true}} - return 0; + return 0; if (0 > un) // expected-warning {{comparison of 0 > unsigned expression is always false}} - return 0; - if (un < 0U) // expected-warning {{comparison of unsigned expression < 0 is always false}} - return 0; - if (un >= 0U) // expected-warning {{comparison of unsigned expression >= 0 is always true}} - return 0; - if (0U <= un) // expected-warning {{comparison of 0 <= unsigned expression is always true}} - return 0; - if (0U > un) // expected-warning {{comparison of 0 > unsigned expression is always false}} - return 0; + return 0; + if (0 >= un) + return 0; + + if (un == 0UL) + return 0; + if (un != 0UL) + return 0; + if (un < 0UL) // expected-warning {{comparison of unsigned expression < 0 is always false}} + return 0; + if (un <= 0UL) + return 0; + if (un > 0UL) + return 0; + if (un >= 0UL) // expected-warning {{comparison of unsigned expression >= 0 is always true}} + return 0; + + if (0UL == un) + return 0; + if (0UL != un) + return 0; + if (0UL < un) + return 0; + if (0UL <= un) // expected-warning {{comparison of 0 <= unsigned expression is always true}} + return 0; + if (0UL > un) // expected-warning {{comparison of 0 > unsigned expression is always false}} + return 0; + if (0UL >= un) + return 0; #else // expected-no-diagnostics + if (un == 0) + return 0; + if (un != 0) + return 0; if (un < 0) - return 0; + return 0; + if (un <= 0) + return 0; + if (un > 0) + return 0; if (un >= 0) - return 0; + return 0; + + if (0 == un) + return 0; + if (0 != un) + return 0; + if (0 < un) + return 0; if (0 <= un) - return 0; + return 0; if (0 > un) - return 0; - if (un < 0U) - return 0; - if (un >= 0U) - return 0; - if (0U <= un) - return 0; - if (0U > un) - return 0; + return 0; + if (0 >= un) + return 0; + + if (un == 0UL) + return 0; + if (un != 0UL) + return 0; + if (un < 0UL) + return 0; + if (un <= 0UL) + return 0; + if (un > 0UL) + return 0; + if (un >= 0UL) + return 0; + + if (0UL == un) + return 0; + if (0UL != un) + return 0; + if (0UL < un) + return 0; + if (0UL <= un) + return 0; + if (0UL > un) + return 0; + if (0UL >= un) + return 0; +#endif + + + signed int a = svalue(); + +#ifdef TEST + if (a == 0) + return 0; + if (a != 0) + return 0; + if (a < 0) + return 0; + if (a <= 0) + return 0; + if (a > 0) + return 0; + if (a >= 0) + return 0; + + if (0 == a) + return 0; + if (0 != a) + return 0; + if (0 < a) + return 0; + if (0 <= a) + return 0; + if (0 > a) + return 0; + if (0 >= a) + return 0; + + if (a == 0UL) + return 0; + if (a != 0UL) + return 0; + if (a < 0UL) // expected-warning {{comparison of unsigned expression < 0 is always false}} + return 0; + if (a <= 0UL) + return 0; + if (a > 0UL) + return 0; + if (a >= 0UL) // expected-warning {{comparison of unsigned expression >= 0 is always true}} + return 0; + + if (0UL == a) + return 0; + if (0UL != a) + return 0; + if (0UL < a) + return 0; + if (0UL <= a) // expected-warning {{comparison of 0 <= unsigned expression is always true}} + return 0; + if (0UL > a) // expected-warning {{comparison of 0 > unsigned expression is always false}} + return 0; + if (0UL >= a) + return 0; +#else +// expected-no-diagnostics + if (a == 0) + return 0; + if (a != 0) + return 0; + if (a < 0) + return 0; + if (a <= 0) + return 0; + if (a > 0) + return 0; + if (a >= 0) + return 0; + + if (0 == a) + return 0; + if (0 != a) + return 0; + if (0 < a) + return 0; + if (0 <= a) + return 0; + if (0 > a) + return 0; + if (0 >= a) + return 0; + + if (a == 0UL) + return 0; + if (a != 0UL) + return 0; + if (a < 0UL) + return 0; + if (a <= 0UL) + return 0; + if (a > 0UL) + return 0; + if (a >= 0UL) + return 0; + + if (0UL == a) + return 0; + if (0UL != a) + return 0; + if (0UL < a) + return 0; + if (0UL <= a) + return 0; + if (0UL > a) + return 0; + if (0UL >= a) + return 0; #endif + + float fl = 0; + + if (fl == 0) + return 0; + if (fl != 0) + return 0; + if (fl < 0) + return 0; + if (fl <= 0) + return 0; + if (fl > 0) + return 0; + if (fl >= 0) + return 0; + + if (0 == fl) + return 0; + if (0 != fl) + return 0; + if (0 < fl) + return 0; + if (0 <= fl) + return 0; + if (0 > fl) + return 0; + if (0 >= fl) + return 0; + + if (fl == 0UL) + return 0; + if (fl != 0UL) + return 0; + if (fl < 0UL) + return 0; + if (fl <= 0UL) + return 0; + if (fl > 0UL) + return 0; + if (fl >= 0UL) + return 0; + + if (0UL == fl) + return 0; + if (0UL != fl) + return 0; + if (0UL < fl) + return 0; + if (0UL <= fl) + return 0; + if (0UL > fl) + return 0; + if (0UL >= fl) + return 0; + + + double dl = 0; + + if (dl == 0) + return 0; + if (dl != 0) + return 0; + if (dl < 0) + return 0; + if (dl <= 0) + return 0; + if (dl > 0) + return 0; + if (dl >= 0) + return 0; + + if (0 == dl) + return 0; + if (0 != dl) + return 0; + if (0 < dl) + return 0; + if (0 <= dl) + return 0; + if (0 > dl) + return 0; + if (0 >= dl) + return 0; + + if (dl == 0UL) + return 0; + if (dl != 0UL) + return 0; + if (dl < 0UL) + return 0; + if (dl <= 0UL) + return 0; + if (dl > 0UL) + return 0; + if (dl >= 0UL) + return 0; + + if (0UL == dl) + return 0; + if (0UL != dl) + return 0; + if (0UL < dl) + return 0; + if (0UL <= dl) + return 0; + if (0UL > dl) + return 0; + if (0UL >= dl) + return 0; + return 1; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/ClangRefactor.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/ClangRefactor.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/ClangRefactor.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/ClangRefactor.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// #include "TestSupport.h" +#include "clang/Frontend/CommandLineSourceLoc.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" #include "clang/Rewrite/Core/Rewriter.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Refactoring.h" @@ -54,7 +56,7 @@ /// Prints any additional state associated with the selection argument to /// the given output stream. - virtual void print(raw_ostream &OS) = 0; + virtual void print(raw_ostream &OS) {} /// Returns a replacement refactoring result consumer (if any) that should /// consume the results of a refactoring operation. @@ -64,7 +66,8 @@ /// logic into the refactoring operation. The test-specific consumer /// ensures that the individual results in a particular test group are /// identical. - virtual std::unique_ptr createCustomConsumer() { + virtual std::unique_ptr + createCustomConsumer() { return nullptr; } @@ -84,7 +87,8 @@ void print(raw_ostream &OS) override { TestSelections.dump(OS); } - std::unique_ptr createCustomConsumer() override { + std::unique_ptr + createCustomConsumer() override { return TestSelections.createConsumer(); } @@ -99,6 +103,41 @@ TestSelectionRangesInFile TestSelections; }; +/// Stores the parsed -selection=filename:line:column[-line:column] option. +class SourceRangeSelectionArgument final : public SourceSelectionArgument { +public: + SourceRangeSelectionArgument(ParsedSourceRange Range) + : Range(std::move(Range)) {} + + bool forAllRanges(const SourceManager &SM, + llvm::function_ref Callback) override { + const FileEntry *FE = SM.getFileManager().getFile(Range.FileName); + FileID FID = FE ? SM.translateFile(FE) : FileID(); + if (!FE || FID.isInvalid()) { + llvm::errs() << "error: -selection=" << Range.FileName + << ":... : given file is not in the target TU\n"; + return true; + } + + SourceLocation Start = SM.getMacroArgExpandedLocation( + SM.translateLineCol(FID, Range.Begin.first, Range.Begin.second)); + SourceLocation End = SM.getMacroArgExpandedLocation( + SM.translateLineCol(FID, Range.End.first, Range.End.second)); + if (Start.isInvalid() || End.isInvalid()) { + llvm::errs() << "error: -selection=" << Range.FileName << ':' + << Range.Begin.first << ':' << Range.Begin.second << '-' + << Range.End.first << ':' << Range.End.second + << " : invalid source location\n"; + return true; + } + Callback(SourceRange(Start, End)); + return false; + } + +private: + ParsedSourceRange Range; +}; + std::unique_ptr SourceSelectionArgument::fromString(StringRef Value) { if (Value.startswith("test:")) { @@ -110,10 +149,12 @@ return llvm::make_unique( std::move(*ParsedTestSelection)); } - // FIXME: Support true selection ranges. + Optional Range = ParsedSourceRange::fromString(Value); + if (Range) + return llvm::make_unique(std::move(*Range)); llvm::errs() << "error: '-selection' option must be specified using " ":: or " - "::-: format"; + "::-: format\n"; return nullptr; } @@ -266,13 +307,34 @@ RefactoringActionCommandLineOptions Options; }; -class ClangRefactorConsumer : public RefactoringResultConsumer { +class ClangRefactorConsumer final : public ClangRefactorToolConsumerInterface { public: - void handleError(llvm::Error Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + ClangRefactorConsumer() {} + + void handleError(llvm::Error Err) override { + Optional Diag = DiagnosticError::take(Err); + if (!Diag) { + llvm::errs() << llvm::toString(std::move(Err)) << "\n"; + return; + } + llvm::cantFail(std::move(Err)); // This is a success. + DiagnosticBuilder DB( + getDiags().Report(Diag->first, Diag->second.getDiagID())); + Diag->second.Emit(DB); } - // FIXME: Consume atomic changes and apply them to files. + void handle(AtomicChanges Changes) override { + SourceChanges.insert(SourceChanges.begin(), Changes.begin(), Changes.end()); + } + + void handle(SymbolOccurrences Occurrences) override { + llvm_unreachable("symbol occurrence results are not handled yet"); + } + + const AtomicChanges &getSourceChanges() const { return SourceChanges; } + +private: + AtomicChanges SourceChanges; }; class ClangRefactorTool { @@ -352,6 +414,39 @@ } } + bool applySourceChanges(const AtomicChanges &Replacements) { + std::set Files; + for (const auto &Change : Replacements) + Files.insert(Change.getFilePath()); + // FIXME: Add automatic formatting support as well. + tooling::ApplyChangesSpec Spec; + // FIXME: We should probably cleanup the result by default as well. + Spec.Cleanup = false; + for (const auto &File : Files) { + llvm::ErrorOr> BufferErr = + llvm::MemoryBuffer::getFile(File); + if (!BufferErr) { + llvm::errs() << "error: failed to open " << File << " for rewriting\n"; + return true; + } + auto Result = tooling::applyAtomicChanges(File, (*BufferErr)->getBuffer(), + Replacements, Spec); + if (!Result) { + llvm::errs() << toString(Result.takeError()); + return true; + } + + std::error_code EC; + llvm::raw_fd_ostream OS(File, EC, llvm::sys::fs::F_Text); + if (EC) { + llvm::errs() << EC.message() << "\n"; + return true; + } + OS << *Result; + } + return false; + } + bool invokeAction(RefactoringActionSubcommand &Subcommand, const CompilationDatabase &DB, ArrayRef Sources) { @@ -386,8 +481,8 @@ return true; } - bool HasFailed = false; ClangRefactorConsumer Consumer; + bool HasFailed = false; if (foreachTranslationUnit(DB, Sources, [&](ASTContext &AST) { RefactoringRuleContext Context(AST.getSourceManager()); Context.setASTContext(AST); @@ -406,24 +501,30 @@ "The action must have at least one selection rule"); }; + std::unique_ptr CustomConsumer; + if (HasSelection) + CustomConsumer = Subcommand.getSelection()->createCustomConsumer(); + ClangRefactorToolConsumerInterface &ActiveConsumer = + CustomConsumer ? *CustomConsumer : Consumer; + ActiveConsumer.beginTU(AST); if (HasSelection) { assert(Subcommand.getSelection() && "Missing selection argument?"); if (opts::Verbose) Subcommand.getSelection()->print(llvm::outs()); - auto CustomConsumer = - Subcommand.getSelection()->createCustomConsumer(); if (Subcommand.getSelection()->forAllRanges( Context.getSources(), [&](SourceRange R) { Context.setSelectionRange(R); - InvokeRule(CustomConsumer ? *CustomConsumer : Consumer); + InvokeRule(ActiveConsumer); })) HasFailed = true; + ActiveConsumer.endTU(); return; } // FIXME (Alex L): Implement non-selection based invocation path. + ActiveConsumer.endTU(); })) return true; - return HasFailed; + return HasFailed || applySourceChanges(Consumer.getSourceChanges()); } }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/TestSupport.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/TestSupport.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/TestSupport.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/TestSupport.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "TestSupport.h" +#include "clang/Basic/DiagnosticError.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" @@ -106,7 +107,7 @@ } class TestRefactoringResultConsumer final - : public tooling::RefactoringResultConsumer { + : public ClangRefactorToolConsumerInterface { public: TestRefactoringResultConsumer(const TestSelectionRangesInFile &TestRanges) : TestRanges(TestRanges) { @@ -182,10 +183,15 @@ std::string ErrorMessage; bool HasResult = !!Result; if (!HasResult) { - // FIXME: Handle diagnostic error as well. - handleAllErrors(Result.takeError(), [&](StringError &Err) { - ErrorMessage = Err.getMessage(); - }); + handleAllErrors( + Result.takeError(), + [&](StringError &Err) { ErrorMessage = Err.getMessage(); }, + [&](DiagnosticError &Err) { + const PartialDiagnosticAt &Diag = Err.getDiagnostic(); + llvm::SmallString<100> DiagText; + Diag.second.EmitToString(getDiags(), DiagText); + ErrorMessage = DiagText.str().str(); + }); } if (!CanonicalResult && !CanonicalErrorMessage) { if (HasResult) @@ -248,7 +254,7 @@ return Failed; } -std::unique_ptr +std::unique_ptr TestSelectionRangesInFile::createConsumer() const { return llvm::make_unique(*this); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/TestSupport.h llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/TestSupport.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/TestSupport.h 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/TestSupport.h 2017-10-17 14:41:56.000000000 +0000 @@ -16,9 +16,9 @@ #ifndef LLVM_CLANG_TOOLS_CLANG_REFACTOR_TEST_SUPPORT_H #define LLVM_CLANG_TOOLS_CLANG_REFACTOR_TEST_SUPPORT_H +#include "ToolRefactoringResultConsumer.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Tooling/Refactoring/RefactoringResultConsumer.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" @@ -65,7 +65,7 @@ bool foreachRange(const SourceManager &SM, llvm::function_ref Callback) const; - std::unique_ptr createConsumer() const; + std::unique_ptr createConsumer() const; void dump(llvm::raw_ostream &OS) const; }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/ToolRefactoringResultConsumer.h llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/ToolRefactoringResultConsumer.h --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/clang-refactor/ToolRefactoringResultConsumer.h 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/clang-refactor/ToolRefactoringResultConsumer.h 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,48 @@ +//===--- ToolRefactoringResultConsumer.h - ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_CLANG_REFACTOR_TOOL_REFACTORING_RESULT_CONSUMER_H +#define LLVM_CLANG_TOOLS_CLANG_REFACTOR_TOOL_REFACTORING_RESULT_CONSUMER_H + +#include "clang/AST/ASTContext.h" +#include "clang/Tooling/Refactoring/RefactoringResultConsumer.h" + +namespace clang { +namespace refactor { + +/// An interface that subclasses the \c RefactoringResultConsumer interface +/// that stores the reference to the TU-specific diagnostics engine. +class ClangRefactorToolConsumerInterface + : public tooling::RefactoringResultConsumer { +public: + /// Called when a TU is entered. + void beginTU(ASTContext &Context) { + assert(!Diags && "Diags has been set"); + Diags = &Context.getDiagnostics(); + } + + /// Called when the tool is done with a TU. + void endTU() { + assert(Diags && "Diags unset"); + Diags = nullptr; + } + + DiagnosticsEngine &getDiags() const { + assert(Diags && "no diags"); + return *Diags; + } + +private: + DiagnosticsEngine *Diags = nullptr; +}; + +} // end namespace refactor +} // end namespace clang + +#endif // LLVM_CLANG_TOOLS_CLANG_REFACTOR_TOOL_REFACTORING_RESULT_CONSUMER_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/diagtool/DiagnosticNames.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/tools/diagtool/DiagnosticNames.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/diagtool/DiagnosticNames.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/diagtool/DiagnosticNames.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -42,6 +42,7 @@ #include "clang/Basic/DiagnosticCommentKinds.inc" #include "clang/Basic/DiagnosticSemaKinds.inc" #include "clang/Basic/DiagnosticAnalysisKinds.inc" +#include "clang/Basic/DiagnosticRefactoringKinds.inc" #undef DIAG }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/libclang/CIndex.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/tools/libclang/CIndex.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/libclang/CIndex.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/libclang/CIndex.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -907,7 +907,8 @@ if (VisitTemplateParameters(D->getTemplateParameters())) return true; - return VisitFunctionDecl(D->getTemplatedDecl()); + auto* FD = D->getTemplatedDecl(); + return VisitAttributes(FD) || VisitFunctionDecl(FD); } bool CursorVisitor::VisitClassTemplateDecl(ClassTemplateDecl *D) { @@ -916,7 +917,8 @@ if (VisitTemplateParameters(D->getTemplateParameters())) return true; - return VisitCXXRecordDecl(D->getTemplatedDecl()); + auto* CD = D->getTemplatedDecl(); + return VisitAttributes(CD) || VisitCXXRecordDecl(CD); } bool CursorVisitor::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/tools/libclang/CXType.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/tools/libclang/CXType.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/tools/libclang/CXType.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/tools/libclang/CXType.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -403,7 +403,10 @@ if (T.getAddressSpace() >= LangAS::FirstTargetAddressSpace) { return T.getQualifiers().getAddressSpaceAttributePrintValue(); } - return T.getAddressSpace(); + // FIXME: this function returns either a LangAS or a target AS + // Those values can overlap which makes this function rather unpredictable + // for any caller + return (unsigned)T.getAddressSpace(); } CXString clang_getTypedefName(CXType CT) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Format/FormatTestComments.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Format/FormatTestComments.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Format/FormatTestComments.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Format/FormatTestComments.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -2407,6 +2407,57 @@ getLLVMStyleWithColumns(15))); } +TEST_F(FormatTestComments, BreaksAfterMultilineBlockCommentsInParamLists) { + EXPECT_EQ("a = f(/* long\n" + " long\n" + " */\n" + " a);", + format("a = f(/* long long */ a);", getLLVMStyleWithColumns(15))); + + EXPECT_EQ("a = f(/* long\n" + " long\n" + " */\n" + " a);", + format("a = f(/* long\n" + " long\n" + " */a);", + getLLVMStyleWithColumns(15))); + + EXPECT_EQ("a = f(/* long\n" + " long\n" + " */\n" + " a);", + format("a = f(/* long\n" + " long\n" + " */ a);", + getLLVMStyleWithColumns(15))); + + EXPECT_EQ("a = f(/* long\n" + " long\n" + " */\n" + " (1 + 1));", + format("a = f(/* long\n" + " long\n" + " */ (1 + 1));", + getLLVMStyleWithColumns(15))); + + EXPECT_EQ( + "a = f(a,\n" + " /* long\n" + " long\n" + " */\n" + " b);", + format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(15))); + + EXPECT_EQ( + "a = f(a,\n" + " /* long\n" + " long\n" + " */\n" + " (1 + 1));", + format("a = f(a, /* long long */ (1 + 1));", getLLVMStyleWithColumns(15))); +} + TEST_F(FormatTestComments, IndentLineCommentsInStartOfBlockAtEndOfFile) { verifyFormat("{\n" " // a\n" @@ -2805,6 +2856,22 @@ getLLVMStyleWithColumns(80))); // clang-format on } + +TEST_F(FormatTestComments, NonTrailingBlockComments) { + verifyFormat("const /** comment comment */ A = B;", + getLLVMStyleWithColumns(40)); + + verifyFormat("const /** comment comment comment */ A =\n" + " B;", + getLLVMStyleWithColumns(40)); + + EXPECT_EQ("const /** comment comment comment\n" + " comment */\n" + " A = B;", + format("const /** comment comment comment comment */\n" + " A = B;", + getLLVMStyleWithColumns(40))); +} } // end namespace } // end namespace format } // end namespace clang diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Format/FormatTestJS.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Format/FormatTestJS.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Format/FormatTestJS.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Format/FormatTestJS.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -65,6 +65,27 @@ TEST_F(FormatTestJS, BlockComments) { verifyFormat("/* aaaaaaaaaaaaa */ aaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);"); + // Breaks after a single line block comment. + EXPECT_EQ("aaaaa = bbbb.ccccccccccccccc(\n" + " /** @type_{!cccc.rrrrrrr.MMMMMMMMMMMM.LLLLLLLLLLL.lala} */\n" + " mediaMessage);", + format("aaaaa = bbbb.ccccccccccccccc(\n" + " /** " + "@type_{!cccc.rrrrrrr.MMMMMMMMMMMM.LLLLLLLLLLL.lala} */ " + "mediaMessage);", + getGoogleJSStyleWithColumns(70))); + // Breaks after a multiline block comment. + EXPECT_EQ( + "aaaaa = bbbb.ccccccccccccccc(\n" + " /**\n" + " * @type_{!cccc.rrrrrrr.MMMMMMMMMMMM.LLLLLLLLLLL.lala}\n" + " */\n" + " mediaMessage);", + format("aaaaa = bbbb.ccccccccccccccc(\n" + " /**\n" + " * @type_{!cccc.rrrrrrr.MMMMMMMMMMMM.LLLLLLLLLLL.lala}\n" + " */ mediaMessage);", + getGoogleJSStyleWithColumns(70))); } TEST_F(FormatTestJS, JSDocComments) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Frontend/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Frontend/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Frontend/CMakeLists.txt 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Frontend/CMakeLists.txt 2017-10-17 14:41:56.000000000 +0000 @@ -7,6 +7,7 @@ CompilerInstanceTest.cpp FrontendActionTest.cpp CodeGenActionTest.cpp + ParsedSourceLocationTest.cpp PCHPreambleTest.cpp ) target_link_libraries(FrontendTests diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Frontend/ParsedSourceLocationTest.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Frontend/ParsedSourceLocationTest.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Frontend/ParsedSourceLocationTest.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Frontend/ParsedSourceLocationTest.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,37 @@ +//===- unittests/Frontend/ParsedSourceLocationTest.cpp - ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Frontend/CommandLineSourceLoc.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { + +TEST(ParsedSourceRange, ParseTest) { + auto Check = [](StringRef Value, StringRef Filename, unsigned BeginLine, + unsigned BeginColumn, unsigned EndLine, unsigned EndColumn) { + Optional PSR = ParsedSourceRange::fromString(Value); + ASSERT_TRUE(PSR); + EXPECT_EQ(PSR->FileName, Filename); + EXPECT_EQ(PSR->Begin.first, BeginLine); + EXPECT_EQ(PSR->Begin.second, BeginColumn); + EXPECT_EQ(PSR->End.first, EndLine); + EXPECT_EQ(PSR->End.second, EndColumn); + }; + + Check("/Users/test/a-b.cpp:1:2", "/Users/test/a-b.cpp", 1, 2, 1, 2); + Check("/Users/test/a-b.cpp:1:2-3:4", "/Users/test/a-b.cpp", 1, 2, 3, 4); + + Check("C:/Users/bob/a-b.cpp:1:2", "C:/Users/bob/a-b.cpp", 1, 2, 1, 2); + Check("C:/Users/bob/a-b.cpp:1:2-3:4", "C:/Users/bob/a-b.cpp", 1, 2, 3, 4); +} + +} // anonymous namespace diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/CMakeLists.txt 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/CMakeLists.txt 2017-10-17 14:41:56.000000000 +0000 @@ -7,6 +7,8 @@ add_clang_unittest(ClangRenameTests RenameClassTest.cpp + RenameEnumTest.cpp + RenameFunctionTest.cpp ) target_link_libraries(ClangRenameTests diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/RenameClassTest.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/RenameClassTest.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/RenameClassTest.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/RenameClassTest.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -51,6 +51,7 @@ testing::ValuesIn(std::vector({ // basic classes {"a::Foo f;", "b::Bar f;", "", ""}, + {"::a::Foo f;", "::b::Bar f;", "", ""}, {"void f(a::Foo f) {}", "void f(b::Bar f) {}", "", ""}, {"void f(a::Foo *f) {}", "void f(b::Bar *f) {}", "", ""}, {"a::Foo f() { return a::Foo(); }", "b::Bar f() { return b::Bar(); }", diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/RenameEnumTest.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/RenameEnumTest.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/RenameEnumTest.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/RenameEnumTest.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,189 @@ +#include "ClangRenameTest.h" + +namespace clang { +namespace clang_rename { +namespace test { +namespace { + +class RenameEnumTest : public ClangRenameTest { +public: + RenameEnumTest() { + AppendToHeader(R"( + #define MACRO(x) x + namespace a { + enum A1 { Red }; + enum class A2 { Blue }; + struct C { + enum NestedEnum { White }; + enum class NestedScopedEnum { Black }; + }; + namespace d { + enum A3 { Orange }; + } // namespace d + enum A4 { Pink }; + } // namespace a + enum A5 { Green };)"); + } +}; + +INSTANTIATE_TEST_CASE_P( + RenameEnumTests, RenameEnumTest, + testing::ValuesIn(std::vector({ + {"void f(a::A2 arg) { a::A2 t = a::A2::Blue; }", + "void f(b::B2 arg) { b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"}, + {"void f() { a::A1* t1; }", "void f() { b::B1* t1; }", "a::A1", + "b::B1"}, + {"void f() { a::A2* t1; }", "void f() { b::B2* t1; }", "a::A2", + "b::B2"}, + {"void f() { enum a::A2 t = a::A2::Blue; }", + "void f() { enum b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"}, + {"void f() { enum a::A2 t = a::A2::Blue; }", + "void f() { enum b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"}, + + {"void f() { a::A1 t = a::Red; }", "void f() { b::B1 t = b::B1::Red; }", + "a::A1", "b::B1"}, + {"void f() { a::A1 t = a::A1::Red; }", + "void f() { b::B1 t = b::B1::Red; }", "a::A1", "b::B1"}, + {"void f() { auto t = a::Red; }", "void f() { auto t = b::B1::Red; }", + "a::A1", "b::B1"}, + {"namespace b { void f() { a::A1 t = a::Red; } }", + "namespace b { void f() { B1 t = B1::Red; } }", "a::A1", "b::B1"}, + {"void f() { a::d::A3 t = a::d::Orange; }", + "void f() { a::b::B3 t = a::b::B3::Orange; }", "a::d::A3", "a::b::B3"}, + {"namespace a { void f() { a::d::A3 t = a::d::Orange; } }", + "namespace a { void f() { b::B3 t = b::B3::Orange; } }", "a::d::A3", + "a::b::B3"}, + {"void f() { A5 t = Green; }", "void f() { B5 t = Green; }", "A5", + "B5"}, + // FIXME: the new namespace qualifier should be added to the unscoped + // enum constant. + {"namespace a { void f() { auto t = Green; } }", + "namespace a { void f() { auto t = Green; } }", "a::A1", "b::B1"}, + + // namespace qualifiers + {"namespace a { void f(A1 a1) {} }", + "namespace a { void f(b::B1 a1) {} }", "a::A1", "b::B1"}, + {"namespace a { void f(A2 a2) {} }", + "namespace a { void f(b::B2 a2) {} }", "a::A2", "b::B2"}, + {"namespace b { void f(a::A1 a1) {} }", + "namespace b { void f(B1 a1) {} }", "a::A1", "b::B1"}, + {"namespace b { void f(a::A2 a2) {} }", + "namespace b { void f(B2 a2) {} }", "a::A2", "b::B2"}, + + // nested enums + {"void f() { a::C::NestedEnum t = a::C::White; }", + "void f() { a::C::NewNestedEnum t = a::C::NewNestedEnum::White; }", + "a::C::NestedEnum", "a::C::NewNestedEnum"}, + {"void f() { a::C::NestedScopedEnum t = a::C::NestedScopedEnum::Black; " + "}", + "void f() { a::C::NewNestedScopedEnum t = " + "a::C::NewNestedScopedEnum::Black; }", + "a::C::NestedScopedEnum", "a::C::NewNestedScopedEnum"}, + + // macros + {"void f(MACRO(a::A1) a1) {}", "void f(MACRO(b::B1) a1) {}", "a::A1", + "b::B1"}, + {"void f(MACRO(a::A2) a2) {}", "void f(MACRO(b::B2) a2) {}", "a::A2", + "b::B2"}, + {"#define FOO(T, t) T t\nvoid f() { FOO(a::A1, a1); }", + "#define FOO(T, t) T t\nvoid f() { FOO(b::B1, a1); }", "a::A1", + "b::B1"}, + {"#define FOO(T, t) T t\nvoid f() { FOO(a::A2, a2); }", + "#define FOO(T, t) T t\nvoid f() { FOO(b::B2, a2); }", "a::A2", + "b::B2"}, + {"#define FOO(n) a::A1 n\nvoid f() { FOO(a1); FOO(a2); }", + "#define FOO(n) b::B1 n\nvoid f() { FOO(a1); FOO(a2); }", "a::A1", + "b::B1"}, + + // using and type alias + {"using a::A1; A1 gA;", "using b::B1; b::B1 gA;", "a::A1", "b::B1"}, + {"using a::A2; A2 gA;", "using b::B2; b::B2 gA;", "a::A2", "b::B2"}, + {"struct S { using T = a::A1; T a_; };", + "struct S { using T = b::B1; T a_; };", "a::A1", "b::B1"}, + {"using T = a::A1; T gA;", "using T = b::B1; T gA;", "a::A1", "b::B1"}, + {"using T = a::A2; T gA;", "using T = b::B2; T gA;", "a::A2", "b::B2"}, + {"typedef a::A1 T; T gA;", "typedef b::B1 T; T gA;", "a::A1", "b::B1"}, + {"typedef a::A2 T; T gA;", "typedef b::B2 T; T gA;", "a::A2", "b::B2"}, + {"typedef MACRO(a::A1) T; T gA;", "typedef MACRO(b::B1) T; T gA;", + "a::A1", "b::B1"}, + + // templates + {"template struct Foo { T t; }; void f() { Foo " + "foo1; }", + "template struct Foo { T t; }; void f() { Foo " + "foo1; }", + "a::A1", "b::B1"}, + {"template struct Foo { T t; }; void f() { Foo " + "foo2; }", + "template struct Foo { T t; }; void f() { Foo " + "foo2; }", + "a::A2", "b::B2"}, + {"template struct Foo { a::A1 a1; };", + "template struct Foo { b::B1 a1; };", "a::A1", "b::B1"}, + {"template struct Foo { a::A2 a2; };", + "template struct Foo { b::B2 a2; };", "a::A2", "b::B2"}, + {"template int f() { return 1; } template<> int f() " + "{ return 2; } int g() { return f(); }", + "template int f() { return 1; } template<> int f() " + "{ return 2; } int g() { return f(); }", + "a::A1", "b::B1"}, + {"template int f() { return 1; } template<> int f() " + "{ return 2; } int g() { return f(); }", + "template int f() { return 1; } template<> int f() " + "{ return 2; } int g() { return f(); }", + "a::A2", "b::B2"}, + {"struct Foo { template T foo(); }; void g() { Foo f; " + "f.foo(); }", + "struct Foo { template T foo(); }; void g() { Foo f; " + "f.foo(); }", + "a::A1", "b::B1"}, + {"struct Foo { template T foo(); }; void g() { Foo f; " + "f.foo(); }", + "struct Foo { template T foo(); }; void g() { Foo f; " + "f.foo(); }", + "a::A2", "b::B2"}, + })), ); + +TEST_P(RenameEnumTest, RenameEnums) { + auto Param = GetParam(); + assert(!Param.OldName.empty()); + assert(!Param.NewName.empty()); + std::string Actual = + runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName); + CompareSnippets(Param.After, Actual); +} + +TEST_F(RenameEnumTest, RenameEnumDecl) { + std::string Before = R"( + namespace ns { + enum Old1 { Blue }; + } + )"; + std::string Expected = R"( + namespace ns { + enum New1 { Blue }; + } + )"; + std::string After = runClangRenameOnCode(Before, "ns::Old1", "ns::New1"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameEnumTest, RenameScopedEnumDecl) { + std::string Before = R"( + namespace ns { + enum class Old1 { Blue }; + } + )"; + std::string Expected = R"( + namespace ns { + enum class New1 { Blue }; + } + )"; + std::string After = runClangRenameOnCode(Before, "ns::Old1", "ns::New1"); + CompareSnippets(Expected, After); +} + +} // anonymous namespace +} // namespace test +} // namespace clang_rename +} // namesdpace clang diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/RenameFunctionTest.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/RenameFunctionTest.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Rename/RenameFunctionTest.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Rename/RenameFunctionTest.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -0,0 +1,555 @@ +//===-- RenameFunctionTest.cpp - unit tests for renaming functions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ClangRenameTest.h" + +namespace clang { +namespace clang_rename { +namespace test { +namespace { + +class RenameFunctionTest : public ClangRenameTest { +public: + RenameFunctionTest() { + AppendToHeader(R"( + struct A { + static bool Foo(); + static bool Spam(); + }; + struct B { + static void Same(); + static bool Foo(); + static int Eric(int x); + }; + void Same(int x); + int Eric(int x); + namespace base { + void Same(); + void ToNanoSeconds(); + void ToInt64NanoSeconds(); + })"); + } +}; + +TEST_F(RenameFunctionTest, RefactorsAFoo) { + std::string Before = R"( + void f() { + A::Foo(); + ::A::Foo(); + })"; + std::string Expected = R"( + void f() { + A::Bar(); + ::A::Bar(); + })"; + + std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, RefactorsNonCallingAFoo) { + std::string Before = R"( + bool g(bool (*func)()) { + return func(); + } + void f() { + auto *ref1 = A::Foo; + auto *ref2 = ::A::Foo; + g(A::Foo); + })"; + std::string Expected = R"( + bool g(bool (*func)()) { + return func(); + } + void f() { + auto *ref1 = A::Bar; + auto *ref2 = ::A::Bar; + g(A::Bar); + })"; + std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, RefactorsEric) { + std::string Before = R"( + void f() { + if (Eric(3)==4) ::Eric(2); + })"; + std::string Expected = R"( + void f() { + if (Larry(3)==4) ::Larry(2); + })"; + std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, RefactorsNonCallingEric) { + std::string Before = R"( + int g(int (*func)(int)) { + return func(1); + } + void f() { + auto *ref = ::Eric; + g(Eric); + })"; + std::string Expected = R"( + int g(int (*func)(int)) { + return func(1); + } + void f() { + auto *ref = ::Larry; + g(Larry); + })"; + std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, DoesNotRefactorBFoo) { + std::string Before = R"( + void f() { + B::Foo(); + })"; + std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar"); + CompareSnippets(Before, After); +} + +TEST_F(RenameFunctionTest, DoesNotRefactorBEric) { + std::string Before = R"( + void f() { + B::Eric(2); + })"; + std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); + CompareSnippets(Before, After); +} + +TEST_F(RenameFunctionTest, DoesNotRefactorCEric) { + std::string Before = R"( + namespace C { int Eric(int x); } + void f() { + if (C::Eric(3)==4) ::C::Eric(2); + })"; + std::string Expected = R"( + namespace C { int Eric(int x); } + void f() { + if (C::Eric(3)==4) ::C::Eric(2); + })"; + std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, DoesNotRefactorEricInNamespaceC) { + std::string Before = R"( + namespace C { + int Eric(int x); + void f() { + if (Eric(3)==4) Eric(2); + } + } // namespace C)"; + std::string After = runClangRenameOnCode(Before, "Eric", "Larry"); + CompareSnippets(Before, After); +} + +TEST_F(RenameFunctionTest, NamespaceQualified) { + std::string Before = R"( + void f() { + base::ToNanoSeconds(); + ::base::ToNanoSeconds(); + } + void g() { + using base::ToNanoSeconds; + base::ToNanoSeconds(); + ::base::ToNanoSeconds(); + ToNanoSeconds(); + } + namespace foo { + namespace base { + void ToNanoSeconds(); + void f() { + base::ToNanoSeconds(); + } + } + void f() { + ::base::ToNanoSeconds(); + } + })"; + std::string Expected = R"( + void f() { + base::ToInt64NanoSeconds(); + ::base::ToInt64NanoSeconds(); + } + void g() { + using base::ToInt64NanoSeconds; + base::ToInt64NanoSeconds(); + ::base::ToInt64NanoSeconds(); + base::ToInt64NanoSeconds(); + } + namespace foo { + namespace base { + void ToNanoSeconds(); + void f() { + base::ToNanoSeconds(); + } + } + void f() { + ::base::ToInt64NanoSeconds(); + } + })"; + std::string After = runClangRenameOnCode(Before, "base::ToNanoSeconds", + "base::ToInt64NanoSeconds"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, RenameFunctionDecls) { + std::string Before = R"( + namespace na { + void X(); + void X() {} + })"; + std::string Expected = R"( + namespace na { + void Y(); + void Y() {} + })"; + std::string After = runClangRenameOnCode(Before, "na::X", "na::Y"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, RenameOutOfLineFunctionDecls) { + std::string Before = R"( + namespace na { + void X(); + } + void na::X() {} + )"; + std::string Expected = R"( + namespace na { + void Y(); + } + void na::Y() {} + )"; + std::string After = runClangRenameOnCode(Before, "na::X", "na::Y"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, NewNamespaceWithoutLeadingDotDot) { + std::string Before = R"( + namespace old_ns { + void X(); + void X() {} + } + // Assume that the reference is in another file. + void f() { old_ns::X(); } + namespace old_ns { void g() { X(); } } + namespace new_ns { void h() { ::old_ns::X(); } } + )"; + std::string Expected = R"( + namespace old_ns { + void Y(); + void Y() {} + } + // Assume that the reference is in another file. + void f() { new_ns::Y(); } + namespace old_ns { void g() { new_ns::Y(); } } + namespace new_ns { void h() { Y(); } } + )"; + std::string After = runClangRenameOnCode(Before, "::old_ns::X", "new_ns::Y"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, NewNamespaceWithLeadingDotDot) { + std::string Before = R"( + namespace old_ns { + void X(); + void X() {} + } + // Assume that the reference is in another file. + void f() { old_ns::X(); } + namespace old_ns { void g() { X(); } } + namespace new_ns { void h() { ::old_ns::X(); } } + )"; + std::string Expected = R"( + namespace old_ns { + void Y(); + void Y() {} + } + // Assume that the reference is in another file. + void f() { ::new_ns::Y(); } + namespace old_ns { void g() { ::new_ns::Y(); } } + namespace new_ns { void h() { Y(); } } + )"; + std::string After = + runClangRenameOnCode(Before, "::old_ns::X", "::new_ns::Y"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, DontRenameSymbolsDefinedInAnonymousNamespace) { + std::string Before = R"( + namespace old_ns { + class X {}; + namespace { + void X(); + void X() {} + void f() { X(); } + } + } + )"; + std::string Expected = R"( + namespace old_ns { + class Y {}; + namespace { + void X(); + void X() {} + void f() { X(); } + } + } + )"; + std::string After = + runClangRenameOnCode(Before, "::old_ns::X", "::old_ns::Y"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, NewNestedNamespace) { + std::string Before = R"( + namespace old_ns { + void X(); + void X() {} + } + // Assume that the reference is in another file. + namespace old_ns { + void f() { X(); } + } + )"; + std::string Expected = R"( + namespace old_ns { + void X(); + void X() {} + } + // Assume that the reference is in another file. + namespace old_ns { + void f() { older_ns::X(); } + } + )"; + std::string After = + runClangRenameOnCode(Before, "::old_ns::X", "::old_ns::older_ns::X"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, MoveFromGlobalToNamespaceWithoutLeadingDotDot) { + std::string Before = R"( + void X(); + void X() {} + + // Assume that the reference is in another file. + namespace some_ns { + void f() { X(); } + } + )"; + std::string Expected = R"( + void X(); + void X() {} + + // Assume that the reference is in another file. + namespace some_ns { + void f() { ns::X(); } + } + )"; + std::string After = + runClangRenameOnCode(Before, "::X", "ns::X"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, MoveFromGlobalToNamespaceWithLeadingDotDot) { + std::string Before = R"( + void Y() {} + + // Assume that the reference is in another file. + namespace some_ns { + void f() { Y(); } + } + )"; + std::string Expected = R"( + void Y() {} + + // Assume that the reference is in another file. + namespace some_ns { + void f() { ::ns::Y(); } + } + )"; + std::string After = + runClangRenameOnCode(Before, "::Y", "::ns::Y"); + CompareSnippets(Expected, After); +} + +// FIXME: the rename of overloaded operator is not fully supported yet. +TEST_F(RenameFunctionTest, DISABLED_DoNotRenameOverloadedOperatorCalls) { + std::string Before = R"( + namespace old_ns { + class T { public: int x; }; + bool operator==(const T& lhs, const T& rhs) { + return lhs.x == rhs.x; + } + } // namespace old_ns + + // Assume that the reference is in another file. + bool f() { + auto eq = old_ns::operator==; + old_ns::T t1, t2; + old_ns::operator==(t1, t2); + return t1 == t2; + } + )"; + std::string Expected = R"( + namespace old_ns { + class T { public: int x; }; + bool operator==(const T& lhs, const T& rhs) { + return lhs.x == rhs.x; + } + } // namespace old_ns + + // Assume that the reference is in another file. + bool f() { + auto eq = new_ns::operator==; + old_ns::T t1, t2; + new_ns::operator==(t1, t2); + return t1 == t2; + } + )"; + std::string After = + runClangRenameOnCode(Before, "old_ns::operator==", "new_ns::operator=="); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, FunctionRefAsTemplate) { + std::string Before = R"( + void X(); + + // Assume that the reference is in another file. + namespace some_ns { + template + class TIterator {}; + + template + class T { + public: + typedef TIterator IterType; + using TI = TIterator; + void g() { + Func(); + auto func = Func; + TIterator iter; + } + }; + + + void f() { T tx; tx.g(); } + } // namespace some_ns + )"; + std::string Expected = R"( + void X(); + + // Assume that the reference is in another file. + namespace some_ns { + template + class TIterator {}; + + template + class T { + public: + typedef TIterator IterType; + using TI = TIterator; + void g() { + Func(); + auto func = Func; + TIterator iter; + } + }; + + + void f() { T tx; tx.g(); } + } // namespace some_ns + )"; + std::string After = runClangRenameOnCode(Before, "::X", "ns::X"); + CompareSnippets(Expected, After); +} + +TEST_F(RenameFunctionTest, RenameFunctionInUsingDecl) { + std::string Before = R"( + using base::ToNanoSeconds; + namespace old_ns { + using base::ToNanoSeconds; + void f() { + using base::ToNanoSeconds; + } + } + )"; + std::string Expected = R"( + using base::ToInt64NanoSeconds; + namespace old_ns { + using base::ToInt64NanoSeconds; + void f() { + using base::ToInt64NanoSeconds; + } + } + )"; + std::string After = runClangRenameOnCode(Before, "base::ToNanoSeconds", + "base::ToInt64NanoSeconds"); + CompareSnippets(Expected, After); +} + +// FIXME: Fix the complex the case where the symbol being renamed is located in +// `std::function>`. +TEST_F(ClangRenameTest, DISABLED_ReferencesInLambdaFunctionParameters) { + std::string Before = R"( + template + class function; + template + class function { + public: + template + function(Functor f) {} + + function() {} + + R operator()(ArgTypes...) const {} + }; + + namespace ns { + void Old() {} + void f() { + function func; + } + } // namespace ns)"; + std::string Expected = R"( + template + class function; + template + class function { + public: + template + function(Functor f) {} + + function() {} + + R operator()(ArgTypes...) const {} + }; + + namespace ns { + void New() {} + void f() { + function func; + } + } // namespace ns)"; + std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New"); + CompareSnippets(Expected, After); +} + +} // anonymous namespace +} // namespace test +} // namespace clang_rename +} // namesdpace clang diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Tooling/RefactoringActionRulesTest.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Tooling/RefactoringActionRulesTest.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/unittests/Tooling/RefactoringActionRulesTest.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/unittests/Tooling/RefactoringActionRulesTest.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -11,6 +11,7 @@ #include "RewriterTestContext.h" #include "clang/Tooling/Refactoring.h" #include "clang/Tooling/Refactoring/RefactoringActionRules.h" +#include "clang/Tooling/Refactoring/RefactoringDiagnostic.h" #include "clang/Tooling/Refactoring/Rename/SymbolName.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/Errc.h" @@ -128,12 +129,12 @@ createReplacements(Rule, RefContext); ASSERT_TRUE(!ErrorOrResult); - std::string Message; - llvm::handleAllErrors( - ErrorOrResult.takeError(), - [&](llvm::StringError &Error) { Message = Error.getMessage(); }); - EXPECT_EQ(Message, - "refactoring action can't be initiated without a selection"); + unsigned DiagID; + llvm::handleAllErrors(ErrorOrResult.takeError(), + [&](DiagnosticError &Error) { + DiagID = Error.getDiagnostic().second.getDiagID(); + }); + EXPECT_EQ(DiagID, diag::err_refactor_no_selection); } } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang/utils/TableGen/ClangAttrEmitter.cpp llvm-toolchain-snapshot-6.0~svn316003/clang/utils/TableGen/ClangAttrEmitter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang/utils/TableGen/ClangAttrEmitter.cpp 2017-10-15 17:43:56.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang/utils/TableGen/ClangAttrEmitter.cpp 2017-10-17 14:41:56.000000000 +0000 @@ -1440,7 +1440,7 @@ assert(!SpellingList.empty() && "Attribute with empty spelling list can't have accessors!"); for (const auto *Accessor : Accessors) { - std::string Name = Accessor->getValueAsString("Name"); + const StringRef Name = Accessor->getValueAsString("Name"); std::vector Spellings = GetFlattenedSpellings(*Accessor); OS << " bool " << Name << "() const { return SpellingListIndex == "; @@ -1589,7 +1589,7 @@ // Abstract rules are used only for sub-rules bool isAbstractRule() const { return getSubjects().empty(); } - std::string getName() const { + StringRef getName() const { return (Constraint ? Constraint : MetaSubject)->getValueAsString("Name"); } @@ -1821,13 +1821,11 @@ // Generate a function that constructs a set of matching rules that describe // to which declarations the attribute should apply to. std::string FnName = "matchRulesFor" + Attr.getName().str(); - std::stringstream SS; - SS << "static void " << FnName << "(llvm::SmallVectorImpl> &MatchRules, const LangOptions &LangOpts) {\n"; if (Attr.isValueUnset("Subjects")) { - SS << "}\n\n"; - OS << SS.str(); + OS << "}\n\n"; return FnName; } const Record *SubjectObj = Attr.getValueAsDef("Subjects"); @@ -1840,24 +1838,23 @@ // The rule might be language specific, so only subtract it from the given // rules if the specific language options are specified. std::vector LangOpts = Rule.getLangOpts(); - SS << " MatchRules.push_back(std::make_pair(" << Rule.getEnumValue() + OS << " MatchRules.push_back(std::make_pair(" << Rule.getEnumValue() << ", /*IsSupported=*/"; if (!LangOpts.empty()) { for (auto I = LangOpts.begin(), E = LangOpts.end(); I != E; ++I) { - std::string Part = (*I)->getValueAsString("Name"); + const StringRef Part = (*I)->getValueAsString("Name"); if ((*I)->getValueAsBit("Negated")) - SS << "!"; - SS << "LangOpts." + Part; + OS << "!"; + OS << "LangOpts." << Part; if (I + 1 != E) - SS << " || "; + OS << " || "; } } else - SS << "true"; - SS << "));\n"; + OS << "true"; + OS << "));\n"; } } - SS << "}\n\n"; - OS << SS.str(); + OS << "}\n\n"; return FnName; } @@ -1913,7 +1910,8 @@ continue; std::string SubRuleFunction; if (SubMatchRules.count(Rule.MetaSubject)) - SubRuleFunction = "isAttributeSubjectMatchSubRuleFor_" + Rule.getName(); + SubRuleFunction = + ("isAttributeSubjectMatchSubRuleFor_" + Rule.getName()).str(); else SubRuleFunction = "defaultIsAttributeSubjectMatchSubRuleFor"; OS << " Case(\"" << Rule.getName() << "\", std::make_pair(" @@ -2998,7 +2996,7 @@ return createArgument(R, AttrName)->isVariadic(); } -static void emitArgInfo(const Record &R, std::stringstream &OS) { +static void emitArgInfo(const Record &R, raw_ostream &OS) { // This function will count the number of arguments specified for the // attribute and emit the number of required arguments followed by the // number of optional arguments. @@ -3030,7 +3028,7 @@ static std::string CalculateDiagnostic(const Record &S) { // If the SubjectList object has a custom diagnostic associated with it, // return that directly. - std::string CustomDiag = S.getValueAsString("CustomDiag"); + const StringRef CustomDiag = S.getValueAsString("CustomDiag"); if (!CustomDiag.empty()) return CustomDiag; @@ -3314,12 +3312,13 @@ // codegen efficiency). std::string FnName = "check", Test; for (auto I = LangOpts.begin(), E = LangOpts.end(); I != E; ++I) { - std::string Part = (*I)->getValueAsString("Name"); + const StringRef Part = (*I)->getValueAsString("Name"); if ((*I)->getValueAsBit("Negated")) { FnName += "Not"; Test += "!"; } - Test += "S.LangOpts." + Part; + Test += "S.LangOpts."; + Test += Part; if (I + 1 != E) Test += " || "; FnName += Part; @@ -3375,7 +3374,7 @@ // applies to multiple target architectures. In order for the attribute to be // considered valid, all of its architectures need to be included. if (!Attr.isValueUnset("ParseKind")) { - std::string APK = Attr.getValueAsString("ParseKind"); + const StringRef APK = Attr.getValueAsString("ParseKind"); for (const auto &I : Dupes) { if (I.first == APK) { std::vector DA = @@ -3471,7 +3470,8 @@ // another mapping. At the same time, generate the AttrInfoMap object // contents. Due to the reliance on generated code, use separate streams so // that code will not be interleaved. - std::stringstream SS; + std::string Buffer; + raw_string_ostream SS {Buffer}; for (auto I = Attrs.begin(), E = Attrs.end(); I != E; ++I) { // TODO: If the attribute's kind appears in the list of duplicates, that is // because it is a target-specific attribute that appears multiple times. @@ -3663,20 +3663,25 @@ public: const Record *Documentation; const Record *Attribute; + std::string Heading; + unsigned SupportedSpellings; - DocumentationData(const Record &Documentation, const Record &Attribute) - : Documentation(&Documentation), Attribute(&Attribute) {} + DocumentationData(const Record &Documentation, const Record &Attribute, + const std::pair HeadingAndKinds) + : Documentation(&Documentation), Attribute(&Attribute), + Heading(std::move(HeadingAndKinds.first)), + SupportedSpellings(HeadingAndKinds.second) {} }; static void WriteCategoryHeader(const Record *DocCategory, raw_ostream &OS) { - const std::string &Name = DocCategory->getValueAsString("Name"); - OS << Name << "\n" << std::string(Name.length(), '=') << "\n"; + const StringRef Name = DocCategory->getValueAsString("Name"); + OS << Name << "\n" << std::string(Name.size(), '=') << "\n"; // If there is content, print that as well. - std::string ContentStr = DocCategory->getValueAsString("Content"); + const StringRef ContentStr = DocCategory->getValueAsString("Content"); // Trim leading and trailing newlines and spaces. - OS << StringRef(ContentStr).trim(); + OS << ContentStr.trim(); OS << "\n\n"; } @@ -3691,16 +3696,17 @@ Pragma = 1 << 6 }; -static void WriteDocumentation(RecordKeeper &Records, - const DocumentationData &Doc, raw_ostream &OS) { +static std::pair +GetAttributeHeadingAndSpellingKinds(const Record &Documentation, + const Record &Attribute) { // FIXME: there is no way to have a per-spelling category for the attribute // documentation. This may not be a limiting factor since the spellings // should generally be consistently applied across the category. - std::vector Spellings = GetFlattenedSpellings(*Doc.Attribute); + std::vector Spellings = GetFlattenedSpellings(Attribute); // Determine the heading to be used for this attribute. - std::string Heading = Doc.Documentation->getValueAsString("Heading"); + std::string Heading = Documentation.getValueAsString("Heading"); bool CustomHeading = !Heading.empty(); if (Heading.empty()) { // If there's only one spelling, we can simply use that. @@ -3722,7 +3728,7 @@ // If the heading is still empty, it is an error. if (Heading.empty()) - PrintFatalError(Doc.Attribute->getLoc(), + PrintFatalError(Attribute.getLoc(), "This attribute requires a heading to be specified"); // Gather a list of unique spellings; this is not the same as the semantic @@ -3765,29 +3771,33 @@ } Heading += ")"; } - OS << Heading << "\n" << std::string(Heading.length(), '-') << "\n"; - if (!SupportedSpellings) - PrintFatalError(Doc.Attribute->getLoc(), + PrintFatalError(Attribute.getLoc(), "Attribute has no supported spellings; cannot be " "documented"); + return std::make_pair(std::move(Heading), SupportedSpellings); +} + +static void WriteDocumentation(RecordKeeper &Records, + const DocumentationData &Doc, raw_ostream &OS) { + OS << Doc.Heading << "\n" << std::string(Doc.Heading.length(), '-') << "\n"; // List what spelling syntaxes the attribute supports. OS << ".. csv-table:: Supported Syntaxes\n"; OS << " :header: \"GNU\", \"C++11\", \"C2x\", \"__declspec\", \"Keyword\","; OS << " \"Pragma\", \"Pragma clang attribute\"\n\n"; OS << " \""; - if (SupportedSpellings & GNU) OS << "X"; + if (Doc.SupportedSpellings & GNU) OS << "X"; OS << "\",\""; - if (SupportedSpellings & CXX11) OS << "X"; + if (Doc.SupportedSpellings & CXX11) OS << "X"; OS << "\",\""; - if (SupportedSpellings & C2x) OS << "X"; + if (Doc.SupportedSpellings & C2x) OS << "X"; OS << "\",\""; - if (SupportedSpellings & Declspec) OS << "X"; + if (Doc.SupportedSpellings & Declspec) OS << "X"; OS << "\",\""; - if (SupportedSpellings & Keyword) OS << "X"; + if (Doc.SupportedSpellings & Keyword) OS << "X"; OS << "\", \""; - if (SupportedSpellings & Pragma) OS << "X"; + if (Doc.SupportedSpellings & Pragma) OS << "X"; OS << "\", \""; if (getPragmaAttributeSupport(Records).isAttributedSupported(*Doc.Attribute)) OS << "X"; @@ -3799,16 +3809,16 @@ OS << "This attribute has been deprecated, and may be removed in a future " << "version of Clang."; const Record &Deprecated = *Doc.Documentation->getValueAsDef("Deprecated"); - std::string Replacement = Deprecated.getValueAsString("Replacement"); + const StringRef Replacement = Deprecated.getValueAsString("Replacement"); if (!Replacement.empty()) OS << " This attribute has been superseded by ``" << Replacement << "``."; OS << "\n\n"; } - std::string ContentStr = Doc.Documentation->getValueAsString("Content"); + const StringRef ContentStr = Doc.Documentation->getValueAsString("Content"); // Trim leading and trailing newlines and spaces. - OS << StringRef(ContentStr).trim(); + OS << ContentStr.trim(); OS << "\n\n\n"; } @@ -3837,23 +3847,29 @@ // If the category is "undocumented", then there cannot be any other // documentation categories (otherwise, the attribute would become // documented). - std::string Cat = Category->getValueAsString("Name"); + const StringRef Cat = Category->getValueAsString("Name"); bool Undocumented = Cat == "Undocumented"; if (Undocumented && Docs.size() > 1) PrintFatalError(Doc.getLoc(), "Attribute is \"Undocumented\", but has multiple " - "documentation categories"); + "documentation categories"); if (!Undocumented) - SplitDocs[Category].push_back(DocumentationData(Doc, Attr)); + SplitDocs[Category].push_back(DocumentationData( + Doc, Attr, GetAttributeHeadingAndSpellingKinds(Doc, Attr))); } } // Having split the attributes out based on what documentation goes where, // we can begin to generate sections of documentation. - for (const auto &I : SplitDocs) { + for (auto &I : SplitDocs) { WriteCategoryHeader(I.first, OS); + std::sort(I.second.begin(), I.second.end(), + [](const DocumentationData &D1, const DocumentationData &D2) { + return D1.Heading < D2.Heading; + }); + // Walk over each of the attributes in the category and write out their // documentation. for (const auto &Doc : I.second) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang-tools-extra/change-namespace/ChangeNamespace.cpp llvm-toolchain-snapshot-6.0~svn316003/clang-tools-extra/change-namespace/ChangeNamespace.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang-tools-extra/change-namespace/ChangeNamespace.cpp 2017-10-15 17:44:13.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang-tools-extra/change-namespace/ChangeNamespace.cpp 2017-10-17 14:42:10.000000000 +0000 @@ -427,7 +427,8 @@ unless(templateSpecializationType())))))), hasParent(nestedNameSpecifierLoc()), hasAncestor(isImplicit()), - hasAncestor(UsingShadowDeclInClass))), + hasAncestor(UsingShadowDeclInClass), + hasAncestor(functionDecl(isDefaulted())))), hasAncestor(decl().bind("dc"))) .bind("type"), this); @@ -451,6 +452,7 @@ specifiesType(hasDeclaration(DeclMatcher.bind("from_decl"))))), unless(anyOf(hasAncestor(isImplicit()), hasAncestor(UsingShadowDeclInClass), + hasAncestor(functionDecl(isDefaulted())), hasAncestor(typeLoc(loc(qualType(hasDeclaration( decl(equalsBoundNode("from_decl")))))))))) .bind("nested_specifier_loc"), diff -Nru llvm-toolchain-snapshot-6.0~svn315865/clang-tools-extra/unittests/change-namespace/ChangeNamespaceTests.cpp llvm-toolchain-snapshot-6.0~svn316003/clang-tools-extra/unittests/change-namespace/ChangeNamespaceTests.cpp --- llvm-toolchain-snapshot-6.0~svn315865/clang-tools-extra/unittests/change-namespace/ChangeNamespaceTests.cpp 2017-10-15 17:44:13.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/clang-tools-extra/unittests/change-namespace/ChangeNamespaceTests.cpp 2017-10-17 14:42:10.000000000 +0000 @@ -2093,6 +2093,68 @@ EXPECT_EQ(format(Expected), runChangeNamespaceOnCode(Code)); } +TEST_F(ChangeNamespaceTest, DefaultMoveConstructors) { + std::string Code = "namespace na {\n" + "class B {\n" + " public:\n" + " B() = default;\n" + " // Allow move only.\n" + " B(B&&) = default;\n" + " B& operator=(B&&) = default;\n" + " B(const B&) = delete;\n" + " B& operator=(const B&) = delete;\n" + " private:\n" + " int ref_;\n" + "};\n" + "} // namespace na\n" + "namespace na {\n" + "namespace nb {\n" + "class A {\n" + "public:\n" + " A() = default;\n" + " A(A&&) = default;\n" + " A& operator=(A&&) = default;\n" + "private:\n" + " B b;\n" + " A(const A&) = delete;\n" + " A& operator=(const A&) = delete;\n" + "};\n" + "void f() { A a; a = A(); A aa = A(); }\n" + "} // namespace nb\n" + "} // namespace na\n"; + std::string Expected = "namespace na {\n" + "class B {\n" + " public:\n" + " B() = default;\n" + " // Allow move only.\n" + " B(B&&) = default;\n" + " B& operator=(B&&) = default;\n" + " B(const B&) = delete;\n" + " B& operator=(const B&) = delete;\n" + " private:\n" + " int ref_;\n" + "};\n" + "} // namespace na\n" + "\n" + "namespace x {\n" + "namespace y {\n" + "class A {\n" + "public:\n" + " A() = default;\n" + " A(A&&) = default;\n" + " A& operator=(A&&) = default;\n" + "private:\n" + " na::B b;\n" + " A(const A&) = delete;\n" + " A& operator=(const A&) = delete;\n" + "};\n" + "void f() { A a; a = A(); A aa = A(); }\n" + "} // namespace y\n" + "} // namespace x\n"; + EXPECT_EQ(format(Expected), runChangeNamespaceOnCode(Code)); +} + + } // anonymous namespace } // namespace change_namespace } // namespace clang diff -Nru llvm-toolchain-snapshot-6.0~svn315865/cmake/config-ix.cmake llvm-toolchain-snapshot-6.0~svn316003/cmake/config-ix.cmake --- llvm-toolchain-snapshot-6.0~svn315865/cmake/config-ix.cmake 2017-10-15 17:43:14.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/cmake/config-ix.cmake 2017-10-17 14:41:11.000000000 +0000 @@ -160,7 +160,7 @@ find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) set(LLVM_LIBXML2_ENABLED 0) set(LIBXML2_FOUND 0) - if((LLVM_ENABLE_LIBXML2) AND (CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH)) + if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) find_package(LibXml2) if (LIBXML2_FOUND) set(LLVM_LIBXML2_ENABLED 1) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/CMakeLists.txt 2017-10-15 17:43:14.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/CMakeLists.txt 2017-10-17 14:41:11.000000000 +0000 @@ -804,7 +804,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM # with libxml2, iconv.h, etc., we must add /usr/local paths. - include_directories("/usr/local/include") + include_directories(SYSTEM "/usr/local/include") link_directories("/usr/local/lib") endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") @@ -1012,3 +1012,4 @@ if (MSVC) include(InstallRequiredSystemLibraries) endif() + diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/lib/scudo/scudo_utils.cpp llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/lib/scudo/scudo_utils.cpp --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/lib/scudo/scudo_utils.cpp 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/lib/scudo/scudo_utils.cpp 2017-10-17 14:42:15.000000000 +0000 @@ -13,8 +13,6 @@ #include "scudo_utils.h" -#include "sanitizer_common/sanitizer_posix.h" - #include #if defined(__x86_64__) || defined(__i386__) # include @@ -23,6 +21,8 @@ # if SANITIZER_ANDROID && __ANDROID_API__ < 18 // getauxval() was introduced with API level 18 on Android. Emulate it using // /proc/self/auxv for lower API levels. +# include "sanitizer_common/sanitizer_posix.h" + # include # define AT_HWCAP 16 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/lib/xray/xray_fdr_logging.cc llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/lib/xray/xray_fdr_logging.cc --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/lib/xray/xray_fdr_logging.cc 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/lib/xray/xray_fdr_logging.cc 2017-10-17 14:42:15.000000000 +0000 @@ -206,17 +206,17 @@ void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { auto TSC_CPU = getTimestamp(); - __xray_fdr_internal::processFunctionHook( - FuncId, Entry, std::get<0>(TSC_CPU), std::get<1>(TSC_CPU), 0, - clock_gettime, *BQ); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), + std::get<1>(TSC_CPU), 0, + clock_gettime, *BQ); } void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg) XRAY_NEVER_INSTRUMENT { auto TSC_CPU = getTimestamp(); - __xray_fdr_internal::processFunctionHook( - FuncId, Entry, std::get<0>(TSC_CPU), std::get<1>(TSC_CPU), Arg, - clock_gettime, *BQ); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), + std::get<1>(TSC_CPU), Arg, + clock_gettime, *BQ); } void fdrLoggingHandleCustomEvent(void *Event, @@ -248,7 +248,7 @@ // - The metadata record we're going to write. (16 bytes) // - The additional data we're going to write. Currently, that's the size of // the event we're going to dump into the log as free-form bytes. - if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) { + if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { TLD.LocalBQ = nullptr; return; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/lib/xray/xray_fdr_logging_impl.h llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/lib/xray/xray_fdr_logging_impl.h --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/lib/xray/xray_fdr_logging_impl.h 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/lib/xray/xray_fdr_logging_impl.h 2017-10-17 14:42:15.000000000 +0000 @@ -532,7 +532,8 @@ return true; } -inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t, +inline bool prepareBuffer(uint64_t TSC, unsigned char CPU, + int (*wall_clock_reader)(clockid_t, struct timespec *), size_t MaxSize) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); @@ -549,6 +550,9 @@ return false; } setupNewBuffer(wall_clock_reader); + + // Always write the CPU metadata as the first record in the buffer. + writeNewCPUIdMetadata(CPU, TSC); } return true; } @@ -599,6 +603,9 @@ } setupNewBuffer(wall_clock_reader); + + // Always write the CPU metadata as the first record in the buffer. + writeNewCPUIdMetadata(CPU, TSC); } if (TLD.CurrentCPU == std::numeric_limits::max()) { @@ -728,7 +735,7 @@ // bytes in the end of the buffer, we need to write out the EOB, get a new // Buffer, set it up properly before doing any further writing. size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize; - if (!prepareBuffer(wall_clock_reader, MaxSize)) { + if (!prepareBuffer(TSC, CPU, wall_clock_reader, MaxSize)) { TLD.LocalBQ = nullptr; return; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/lit.local.cfg llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/lit.local.cfg --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/lit.local.cfg 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/lit.local.cfg 2017-10-17 14:42:15.000000000 +0000 @@ -5,5 +5,9 @@ root = getRoot(config) -if root.host_os not in ['Linux'] or config.android: +if root.host_os not in ['Linux']: + config.unsupported = True + +# Android O (API level 26) has support for cross-dso cfi in libdl.so. +if config.android and 'android-26' not in config.available_features: config.unsupported = True diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/shadow_is_read_only.cpp llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/shadow_is_read_only.cpp --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/shadow_is_read_only.cpp 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/shadow_is_read_only.cpp 2017-10-17 14:42:15.000000000 +0000 @@ -12,6 +12,9 @@ // Tests that shadow is read-only most of the time. // REQUIRES: cxxabi +// Uses private API that is not available on Android. +// UNSUPPORTED: android + #include #include #include diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/stats.cpp llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/stats.cpp --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/stats.cpp 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/stats.cpp 2017-10-17 14:42:15.000000000 +0000 @@ -6,6 +6,9 @@ // CFI-icall is not implemented in thinlto mode => ".cfi" suffixes are missing // in sanstats output. +// FIXME: %t.stats must be transferred from device to host for this to work on Android. +// XFAIL: android + struct ABase {}; struct A : ABase { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/util/cfi_stubs.h llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/util/cfi_stubs.h --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/cross-dso/util/cfi_stubs.h 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/cross-dso/util/cfi_stubs.h 2017-10-17 14:42:15.000000000 +0000 @@ -0,0 +1,30 @@ +// This is a hack to access CFI interface that Android has in libdl.so on +// device, but not in the NDK. +#include +#include +#include + +typedef void (*cfi_slowpath_ty)(uint64_t, void *); +typedef void (*cfi_slowpath_diag_ty)(uint64_t, void *, void *); + +static cfi_slowpath_ty cfi_slowpath; +static cfi_slowpath_diag_ty cfi_slowpath_diag; + +__attribute__((constructor(0), no_sanitize("cfi"))) static void init() { + cfi_slowpath = (cfi_slowpath_ty)dlsym(RTLD_NEXT, "__cfi_slowpath"); + cfi_slowpath_diag = + (cfi_slowpath_diag_ty)dlsym(RTLD_NEXT, "__cfi_slowpath_diag"); + if (!cfi_slowpath || !cfi_slowpath_diag) abort(); +} + +extern "C" { +__attribute__((visibility("hidden"), no_sanitize("cfi"))) void __cfi_slowpath( + uint64_t Type, void *Addr) { + cfi_slowpath(Type, Addr); +} + +__attribute__((visibility("hidden"), no_sanitize("cfi"))) void +__cfi_slowpath_diag(uint64_t Type, void *Addr, void *Diag) { + cfi_slowpath_diag(Type, Addr, Diag); +} +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/lit.cfg llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/lit.cfg --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/cfi/lit.cfg 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/cfi/lit.cfg 2017-10-17 14:42:15.000000000 +0000 @@ -24,6 +24,8 @@ diag = '-fno-sanitize-trap=cfi -fsanitize-recover=cfi ' non_dso = '-fvisibility=hidden ' dso = '-fsanitize-cfi-cross-dso -fvisibility=default ' + if config.android: + dso += '-include ' + config.test_source_root + '/cross-dso/util/cfi_stubs.h ' config.substitutions.append((r"%clang_cfi ", clang_cfi + non_dso)) config.substitutions.append((r"%clangxx_cfi ", clang_cfi + cxx + non_dso)) config.substitutions.append((r"%clang_cfi_diag ", clang_cfi + non_dso + diag)) @@ -33,5 +35,8 @@ else: config.unsupported = True +if config.default_sanitizer_opts: + config.environment['UBSAN_OPTIONS'] = ':'.join(config.default_sanitizer_opts) + if lit_config.params.get('check_supported', None) and config.unsupported: raise BaseException("Tests unsupported") diff -Nru llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/lit.common.cfg llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/lit.common.cfg --- llvm-toolchain-snapshot-6.0~svn315865/compiler-rt/test/lit.common.cfg 2017-10-15 17:44:19.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/compiler-rt/test/lit.common.cfg 2017-10-17 14:42:15.000000000 +0000 @@ -201,6 +201,19 @@ else: config.substitutions.append( ("%macos_min_target_10_11", "") ) +if config.android: + adb = os.environ.get('ADB', 'adb') + try: + android_api_level_str = subprocess.check_output([adb, "shell", "getprop", "ro.build.version.sdk"]).rstrip() + except (subprocess.CalledProcessError, OSError): + lit_config.fatal("Failed to read ro.build.version.sdk (using '%s' as adb)" % adb) + try: + android_api_level = int(android_api_level_str) + except ValueError: + lit_config.fatal("Failed to read ro.build.version.sdk (using '%s' as adb): got '%s'" % (adb, android_api_level_str)) + if android_api_level >= 26: + config.available_features.add('android-26') + sancovcc_path = os.path.join(config.llvm_tools_dir, "sancov") if os.path.exists(sancovcc_path): config.available_features.add("has_sancovcc") diff -Nru llvm-toolchain-snapshot-6.0~svn315865/debian/changelog llvm-toolchain-snapshot-6.0~svn316003/debian/changelog --- llvm-toolchain-snapshot-6.0~svn315865/debian/changelog 2017-10-15 17:48:42.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/debian/changelog 2017-10-17 15:56:40.000000000 +0000 @@ -1,14 +1,291 @@ -llvm-toolchain-snapshot (1:6.0~svn315865-0~z~padoka0) zesty; urgency=medium +llvm-toolchain-snapshot (1:6.0~svn316003-0~z~padoka0) zesty; urgency=medium * New snapshot release - -- Paulo Dias Sun, 15 Oct 2017 15:48:42 -0200 + -- Paulo Dias Tue, 17 Oct 2017 13:56:40 -0200 -llvm-toolchain-snapshot (1:4.0~svn282919-1~exp1) experimental; urgency=medium +llvm-toolchain-snapshot (1:6.0~svn316003-1~exp1) experimental; urgency=medium * New snapshot release - -- Sylvestre Ledru Fri, 30 Sep 2016 17:17:48 -0300 + -- Sylvestre Ledru Tue, 17 Oct 2017 12:42:38 -0200 + +llvm-toolchain-snapshot (1:6.0~svn315736-2) unstable; urgency=medium + + * Also ship ld64.lld in the lld-X.Y package + * Update d/rules to reflect the move of libfuzzer into compiler-rt + + -- Sylvestre Ledru Mon, 16 Oct 2017 10:24:33 +0200 + +llvm-toolchain-snapshot (1:6.0~svn315736-1) unstable; urgency=medium + + * New snapshot + * Ship liblldMinGW lld lib + * Ship clang-refactor & clang-func-mapping in clang-X.Y + * Remove the -Wl option to call gold instead of the normal linker + (Closes: #876787) + * Force the deactivation of ocaml until the transition is done + * Standards Version 4.1.0 + + [ Gianfranco Costamagna ] + * Enable ocaml on release architectures. + * Add NDEBUG flag, lost in the -g -> -g1 switch + + [ Matthias Klose ] + * Link with --no-keep-files-mapped --no-map-whole-files when using gold. + * build using gold on arm64 and s390x. For backports, arm64 might still + need the BFD linker, and building with only one or two processes in + parallel. + * On amd64, s390x, arm64 and ppc64el, build with -g1 instead of -g. + * Set CMAKE_CXX_FLAGS_RELWITHDEBINFO and pass opt_flags. + + -- Sylvestre Ledru Mon, 11 Sep 2017 22:27:20 +0200 + + -- Sylvestre Ledru Tue, 12 Sep 2017 16:23:10 -0300 + +llvm-toolchain-snapshot (1:6.0~svn311834-2) unstable; urgency=medium + + * Fix the FTBFS because of -gsplit-dwarf: + - Only enable it on archs which needs it + - Only enable it when gcc supports it correctly + * Fail the build if the arch + gcc has a broken gsplit-dwarf support + + -- Sylvestre Ledru Thu, 31 Aug 2017 19:14:53 +0200 + +llvm-toolchain-snapshot (1:6.0~svn311834-1) unstable; urgency=medium + + * Link LLDB with -latomic on powerpcspe (Closes: #872267) + * Fix the C++ include path order (Closes: #859083) + * Disable -gsplit-dwarf when using gcc 7 for causing a linking issue + See https://bugs.llvm.org/show_bug.cgi?id=34140 + (Closes: #853525) + * clang was producing unusable binaries on armv5tel (Closes: #873307) + Thanks to Adrian Bunk for the patch + * With Ubuntu Trusty (for apt.llvm.org), the build fails + on internal compiler error: in output_index_string, at dwarf2out.c:218 + force the usage of gcc 4.9 + + -- Sylvestre Ledru Sat, 26 Aug 2017 22:35:00 +0200 + +llvm-toolchain-snapshot (1:6.0~svn310776-1) unstable; urgency=medium + + * We moved from 5.0 to 6.0 + * Ship the opt-viewer new program as part of llvm-6.0 tools + * ld.lld manpage wasn't installed + * Disable the clang-fix-cmpxchg8-detection-on-i386.patch patch + because breaks the build with + 'error: 'isCmpXChg8Supported' was not declared in this scope' + * Remove usr/bin/liblldb-intel-mpxtable.so-6.0 as it seems to be removed + from usptream + * Force the usage of gcc 6 until the link issues with gcc 7 are fixed + https://bugs.llvm.org/show_bug.cgi?id=34140 + + [ Katsuhiko Nishimra ] + * Ensure /usr/bin/g++-$(GCC_VERSION) exists (Closes: #871591) + + -- Sylvestre Ledru Thu, 27 Jul 2017 23:16:06 +0200 + +llvm-toolchain-snapshot (1:5.0~svn305653-1) unstable; urgency=medium + + [ Gianfranco Costamagna ] + * Re-add clang-doc documentation + + [ Sylvestre Ledru ] + * New snapshot release + * Fix a hurd PATH_MAX issue + * Transform the lldb swig check from a error to a warning + * Add libomp-dev to the suggests of clang + * Add Provides on python-lldb-x.y & python-clang-x.y & libllvm-x.y-ocaml-dev + to avoid the recurring problem about conflicts + (Closes: #835546, #863739, #863742) + * Standards-Version => 4.0.0 + * Generate the llvm-tblgen, clang-change-namespace, clang-offload-bundler + lld, clang++, clang-check, clang-cpp & clang-import-test manpages + * Remove the --no-discard-stderr option from help2man calls + * use -DPOLLY_BUNDLED_JSONCPP=OFF & add pkg-config as a dep (to help find + the files) + + -- Sylvestre Ledru Fri, 09 Jun 2017 12:04:56 +0200 + +llvm-toolchain-snapshot (1:5.0~svn302368-1~exp1) experimental; urgency=medium + + * Only enable libfuzzer for Linux kernel. + Thanks to Pino Toscano for the patch + * Add option -DPOLLY_BUNDLED_JSONCPP=ON + to use the system lib instead of the patch d/p/use-deb-json.diff + * New symbols added in libclang + - clang_EvalResult_getAsLongLong + - clang_EvalResult_getAsUnsigned + - clang_EvalResult_isUnsignedInt + - clang_TargetInfo_dispose + - clang_TargetInfo_getPointerWidth + - clang_TargetInfo_getTriple + - clang_Type_isTransparentTagTypedef + - clang_getAllSkippedRanges + - clang_getTranslationUnitTargetInfo + + -- Sylvestre Ledru Sun, 07 May 2017 12:13:43 +0200 + +llvm-toolchain-snapshot (1:5.0~svn298899-1) unstable; urgency=medium + + * Limit the archs where the ocaml binding is built + Should fix the FTBFS + Currently amd64 arm64 armel armhf i386 + * d/p/add_symbols_versioning.patch removed (applied upstream) + * Really fix "use versioned symbols" for llvm + Thanks to Julien Cristau for the patch (Closes: #849098) + * Explicit the dep of clang-tidy on same version of llvm to avoid + undefined symbols + * Add override_dh_makeshlibs for the libllvm or liblldb versions + Thanks to Julien Cristau for the patch + * change the min version of the libclang1 symbols to 1:4.0-3~ + * Fix the symlink on scan-build-py + * add libncurses in the list of build deps (Closes: #861170) + + [ Rebecca N. Palmer ] + * Use versioned symbols (Closes: #848368) + + -- Sylvestre Ledru Sun, 09 Apr 2017 10:11:56 +0200 + +llvm-toolchain-snapshot (1:5.0~svn294583-1~exp1) experimental; urgency=medium + + * New snapshot release + * New library liblldb-intel-mpxtable.so + * Fix the incorrect symlink to scan-build-py (Closes: #856869) + * Explicit the dep of clang-format on same version of llvm to avoid + undefined symbols + + -- Sylvestre Ledru Mon, 16 Jan 2017 09:03:48 +0100 + +llvm-toolchain-snapshot (1:5.0~svn292017-1~exp1) experimental; urgency=medium + + * New snapshot release + snapshot is now 5.0 + * d/p/silent-amdgpu-test-failing.diff silent amdgpu tests failing + (see upstream bug 31610) + * d/p/lldb-server-link-issue.patch removed, merged upstream + * Also install python-lldb-5.0 when installing lldb-5.0 (Closes: #851171) + * Bring back the content of llvm-5.0-doc (Closes: #844616) + * Bring back the content of llvm-4.0-doc (Closes: #844616) + * d/p/pthread-link.diff Hardcode like to pthread which was missing for + libclang + + -- Sylvestre Ledru Sat, 14 Jan 2017 16:36:51 +0100 + +llvm-toolchain-snapshot (1:4.0~svn291344-1) unstable; urgency=medium + + * New snapshot release + * Disable libedit usage in lldb because of garbage (Closes: #846616, #850111) + * Build lld + - d/p/lld-arg-cmake-issue.diff fixes upstream bug #27685 + * ship clang-tblgen + * Run clang extra test suite + * Fix the detection of lldb-server + * Fix the run of the check-lldb target + + -- Sylvestre Ledru Sat, 07 Jan 2017 12:24:32 +0100 + +llvm-toolchain-snapshot (1:4.0~svn290810-1) unstable; urgency=medium + + * New snapshot release + * d/p/kfreebsd-support.diff removed (applied upstream) + * debian/orig-tar.sh: less verbose + * d/p/lldb-missing-install.diff: For the install + of lldb-server and lldb-argdumper as they are not always installed + * Ship new binary in clang-X.Y: clang-import-test + * New symbols in libclang1: + - clang_EvalResult_getAsLongLong + - clang_EvalResult_getAsLongLong + - clang_EvalResult_isUnsignedInt + * Fix a regression in the test run for the code coverage + * Silent ThinLTO/X86/autoupgrade.ll, fails with code coverage (and maybe others) + + -- Sylvestre Ledru Mon, 02 Jan 2017 13:51:06 +0100 + +llvm-toolchain-snapshot (1:4.0~svn286225-1) unstable; urgency=medium + + * New snapshot release + * Remove the info section from the generated manpages (Closes: #846269) + + [ Kai Wasserbäch ] + * debian/patches/{0003-Debian-version-info-and-bugreport.patch, + 0044-soname.diff,23-strlcpy_strlcat_warning_removed.diff, + 26-set-correct-float-abi.diff,atomic_library_[12].diff, + fix-clang-path-and-build.diff,fix-lldb-server-build,lldb-libname.diff, + lldb-soname.diff,mips-fpxx-enable.diff,removeduplicatedeclaration.diff}: + Refreshed. + * debian/patches/{silent-gold-utils,kfreebsd-support}.diff: Updated. + + -- Sylvestre Ledru Tue, 08 Nov 2016 12:19:55 +0100 + +llvm-toolchain-snapshot (1:4.0~svn282142-1~exp1) experimental; urgency=medium + + * The libstdc++-6-dev & libobjc-6-dev are only install with clang-X.Y + and libclang-X.Y-dev and no longer with libclang1-X.Y + (Closes: #841309) + * Fix the VCS-* fields + + [ Kai Wasserbäch ] + * debian/patches/{23-strlcpy_strlcat_warning_removed.diff, + 0003-Debian-version-info-and-bugreport.patch, atomic_library_[12].diff, + python-clangpath.diff,removeduplicatedeclaration.diff, + fix-clang-path-and-build.diff,mips-fpxx-enable.diff}: Refreshed. + * debian/patches/{silent-more-tests.diff,silent-MCJIIT-tests.diff}: Updated. + + -- Sylvestre Ledru Mon, 24 Oct 2016 10:45:07 +0200 + +llvm-toolchain-3.9 (1:3.9-6) unstable; urgency=medium + + * Fix segfaults in the memory sanitizers (Closes: #842642) + Caused by the newer glibc. Many thanks for Nobert Lange for everything + * Enable the sanitizers testsuite + + -- Sylvestre Ledru Fri, 11 Nov 2016 17:01:38 +0100 + +llvm-toolchain-3.9 (1:3.9-5) unstable; urgency=medium + + * d/p/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch: Also apply bug 29163 + to fix some issues in rust (Closes: #842956) + Many thanks to Ximin Luo for the investigation + + * libclang-common-4.0-dev: missing multilib binaries for the sanitizer + libraries (Closes: #841923) + Many thanks to Norbert Lange for the changes + + [ Pauli ] + * d/p/clang-fix-cmpxchg8-detection-on-i386.patch: + libcxx atomic tests for old i386 fail with wrong Atomic inline width. + Needed for libc++ + (See https://llvm.org/bugs/show_bug.cgi?id=19355) + * d/p lldb-addversion-suffix-to-llvm-server-exec.patch: + Fix the lldb-server call in some cases + + -- Sylvestre Ledru Fri, 04 Nov 2016 17:18:07 +0100 + +llvm-toolchain-3.9 (1:3.9-4) unstable; urgency=medium + + * LLVMConfig.cmake was installed into wrong location + Install a symlink from lib/cmake/llvm to share/llvm/cmake + (Closes: #839234) + * Fix a path issue in scan-view. Thanks Riccardo Magliocchetti + (Closes: #838572) + + -- Sylvestre Ledru Mon, 31 Oct 2016 10:47:52 +0100 + +llvm-toolchain-3.9 (1:3.9-3) unstable; urgency=medium + + [ Sylvestre Ledru ] + * The libstdc++-6-dev & libobjc-6-dev are only install with clang-X.Y + and libclang-X.Y-dev and no longer with libclang1-X.Y + (Closes: #841309) + + [ Gianfranco Costamagna ] + * Team upload + * d/p/drop-wrong-hack-arm64.patch: + - drop hack that was preventing the package from building on + non-amd64 64bit architectures: + + -- Gianfranco Costamagna Thu, 27 Oct 2016 11:45:28 +0200 llvm-toolchain-snapshot (1:4.0~svn280796-1~exp1) experimental; urgency=medium diff -Nru llvm-toolchain-snapshot-6.0~svn315865/debian/lld-X.Y.install.in llvm-toolchain-snapshot-6.0~svn316003/debian/lld-X.Y.install.in --- llvm-toolchain-snapshot-6.0~svn315865/debian/lld-X.Y.install.in 2017-08-08 01:08:13.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/debian/lld-X.Y.install.in 2017-10-17 14:39:33.000000000 +0000 @@ -1,7 +1,9 @@ usr/lib/llvm-@LLVM_VERSION@/bin/ld.lld* +usr/lib/llvm-@LLVM_VERSION@/bin/ld64.lld usr/lib/llvm-@LLVM_VERSION@/bin/lld-* usr/lib/llvm-@LLVM_VERSION@/bin/lld usr/bin/lld-link-@LLVM_VERSION@ usr/bin/ld.lld-@LLVM_VERSION@ +usr/bin/ld64.lld-@LLVM_VERSION@ usr/bin/lld-@LLVM_VERSION@* diff -Nru llvm-toolchain-snapshot-6.0~svn315865/debian/rules llvm-toolchain-snapshot-6.0~svn316003/debian/rules --- llvm-toolchain-snapshot-6.0~svn315865/debian/rules 2017-10-15 17:42:27.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/debian/rules 2017-10-17 14:39:33.000000000 +0000 @@ -302,7 +302,7 @@ CFLAGS=`dpkg-buildflags --get CFLAGS`; \ CFLAGS="$$CFLAGS `dpkg-buildflags --get CPPFLAGS`"; \ echo $$CFLAGS; \ - bin/clang++ -c $$CFLAGS -std=c++11 ../lib/Fuzzer/*.cpp -IFuzzer; \ + bin/clang++ -c $$CFLAGS -std=c++11 ../compiler-rt/lib/fuzzer/*.cpp -IFuzzer; \ ar ruv libFuzzer.a Fuzzer*.o endif diff -Nru llvm-toolchain-snapshot-6.0~svn315865/docs/AMDGPUUsage.rst llvm-toolchain-snapshot-6.0~svn316003/docs/AMDGPUUsage.rst --- llvm-toolchain-snapshot-6.0~svn315865/docs/AMDGPUUsage.rst 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/docs/AMDGPUUsage.rst 2017-10-17 14:41:11.000000000 +0000 @@ -684,7 +684,12 @@ **S** Represents the value of the symbol whose index resides in the relocation - entry. + entry. Relocations not using this must specify a symbol index of ``STN_UNDEF``. + +**B** + Represents the base address of a loaded executable or shared object which is + the difference between the ELF address and the actual load address. Relocations + using this are only valid in executable or shared objects. The following relocation types are supported: @@ -706,6 +711,8 @@ ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 + *reserved* 12 + ``R_AMDGPU_RELATIVE64`` 13 ``word64`` B + A ========================== ===== ========== ============================== .. _amdgpu-dwarf: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/docs/LangRef.rst llvm-toolchain-snapshot-6.0~svn316003/docs/LangRef.rst --- llvm-toolchain-snapshot-6.0~svn315865/docs/LangRef.rst 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/docs/LangRef.rst 2017-10-17 14:41:11.000000000 +0000 @@ -4878,6 +4878,23 @@ !0 = !{ i64 0, i64 256 } !1 = !{ i64 -1, i64 -1 } +'``callees``' Metadata +^^^^^^^^^^^^^^^^^^^^^^ + +``callees`` metadata may be attached to indirect call sites. If ``callees`` +metadata is attached to a call site, and any callee is not among the set of +functions provided by the metadata, the behavior is undefined. The intent of +this metadata is to facilitate optimizations such as indirect-call promotion. +For example, in the code below, the call instruction may only target the +``add`` or ``sub`` functions: + +.. code-block:: llvm + + %result = call i64 %binop(i64 %x, i64 %y), !callees !0 + + ... + !0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub} + '``unpredictable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Analysis/ScalarEvolution.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Analysis/ScalarEvolution.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Analysis/ScalarEvolution.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Analysis/ScalarEvolution.h 2017-10-17 14:41:11.000000000 +0000 @@ -29,7 +29,6 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -1263,10 +1262,6 @@ /// Invalidate this result and free associated memory. void clear(); - - /// Insert all loops referred to by this BackedgeTakenCount into \p Result. - void findUsedLoops(ScalarEvolution &SE, - SmallPtrSetImpl &Result) const; }; /// Cache the backedge-taken count of the loops for this function as they @@ -1746,9 +1741,10 @@ const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, bool Equality); - // Compute the maximum backedge count based on the range of values - // permitted by Start, End, and Stride. - const SCEV *computeMaxBECount(const SCEV *Start, const SCEV *Stride, + /// Compute the maximum backedge count based on the range of values + /// permitted by Start, End, and Stride. This is for loops of the form + /// {Start, +, Stride} LT End. + const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, const SCEV *End, unsigned BitWidth, bool IsSigned); @@ -1775,20 +1771,14 @@ /// Find all of the loops transitively used in \p S, and update \c LoopUsers /// accordingly. void addToLoopUseLists(const SCEV *S); - void addToLoopUseLists(const BackedgeTakenInfo &BTI, const Loop *L); FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; - /// This maps loops to a list of entities that (transitively) use said loop. - /// A SCEV expression in the vector corresponding to a loop denotes that the - /// SCEV expression transitively uses said loop. A loop (LA) in the vector - /// corresponding to another loop (LB) denotes that LB is used in one of the - /// cached trip counts for LA. - DenseMap, 4>> - LoopUsers; + /// This maps loops to a list of SCEV expressions that (transitively) use said + /// loop. + DenseMap> LoopUsers; /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression /// they can be rewritten into under certain predicates. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Analysis/SparsePropagation.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Analysis/SparsePropagation.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Analysis/SparsePropagation.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Analysis/SparsePropagation.h 2017-10-17 14:41:11.000000000 +0000 @@ -23,16 +23,27 @@ namespace llvm { -template class SparseSolver; +/// A template for translating between LLVM Values and LatticeKeys. Clients must +/// provide a specialization of LatticeKeyInfo for their LatticeKey type. +template struct LatticeKeyInfo { + // static inline Value *getValueFromLatticeKey(LatticeKey Key); + // static inline LatticeKey getLatticeKeyFromValue(Value *V); +}; + +template > +class SparseSolver; /// AbstractLatticeFunction - This class is implemented by the dataflow instance /// to specify what the lattice values are and how they handle merges etc. This /// gives the client the power to compute lattice values from instructions, /// constants, etc. The current requirement is that lattice values must be -/// copyable. At the moment, nothing tries to avoid copying. - - -template class AbstractLatticeFunction { +/// copyable. At the moment, nothing tries to avoid copying. Additionally, +/// lattice keys must be able to be used as keys of a mapping data structure. +/// Internally, the generic solver currently uses a DenseMap to map lattice keys +/// to lattice values. If the lattice key is a non-standard type, a +/// specialization of DenseMapInfo must be provided. +template class AbstractLatticeFunction { private: LatticeVal UndefVal, OverdefinedVal, UntrackedVal; @@ -50,35 +61,21 @@ LatticeVal getOverdefinedVal() const { return OverdefinedVal; } LatticeVal getUntrackedVal() const { return UntrackedVal; } - /// IsUntrackedValue - If the specified Value is something that is obviously - /// uninteresting to the analysis (and would always return UntrackedVal), - /// this function can return true to avoid pointless work. - virtual bool IsUntrackedValue(Value *V) { return false; } - - /// ComputeConstant - Given a constant value, compute and return a lattice - /// value corresponding to the specified constant. - virtual LatticeVal ComputeConstant(Constant *C) { - return getOverdefinedVal(); // always safe + /// IsUntrackedValue - If the specified LatticeKey is obviously uninteresting + /// to the analysis (i.e., it would always return UntrackedVal), this + /// function can return true to avoid pointless work. + virtual bool IsUntrackedValue(LatticeKey Key) { return false; } + + /// ComputeLatticeVal - Compute and return a LatticeVal corresponding to the + /// given LatticeKey. + virtual LatticeVal ComputeLatticeVal(LatticeKey Key) { + return getOverdefinedVal(); } /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is /// one that the we want to handle through ComputeInstructionState. virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; } - /// GetConstant - If the specified lattice value is representable as an LLVM - /// constant value, return it. Otherwise return null. The returned value - /// must be in the same LLVM type as Val. - virtual Constant *GetConstant(LatticeVal LV, Value *Val, - SparseSolver &SS) { - return nullptr; - } - - /// ComputeArgument - Given a formal argument value, compute and return a - /// lattice value corresponding to the specified argument. - virtual LatticeVal ComputeArgument(Argument *I) { - return getOverdefinedVal(); // always safe - } - /// MergeValues - Compute and return the merge of the two specified lattice /// values. Merging should only move one direction down the lattice to /// guarantee convergence (toward overdefined). @@ -86,27 +83,40 @@ return getOverdefinedVal(); // always safe, never useful. } - /// ComputeInstructionState - Given an instruction and a vector of its operand - /// values, compute the result value of the instruction. - virtual LatticeVal ComputeInstructionState(Instruction &I, - SparseSolver &SS) { - return getOverdefinedVal(); // always safe, never useful. + /// ComputeInstructionState - Compute the LatticeKeys that change as a result + /// of executing instruction \p I. Their associated LatticeVals are store in + /// \p ChangedValues. + virtual void + ComputeInstructionState(Instruction &I, + DenseMap &ChangedValues, + SparseSolver &SS) = 0; + + /// PrintLatticeVal - Render the given LatticeVal to the specified stream. + virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); + + /// PrintLatticeKey - Render the given LatticeKey to the specified stream. + virtual void PrintLatticeKey(LatticeKey Key, raw_ostream &OS); + + /// GetValueFromLatticeVal - If the given LatticeVal is representable as an + /// LLVM value, return it; otherwise, return nullptr. If a type is given, the + /// returned value must have the same type. This function is used by the + /// generic solver in attempting to resolve branch and switch conditions. + virtual Value *GetValueFromLatticeVal(LatticeVal LV, Type *Ty = nullptr) { + return nullptr; } - - /// PrintValue - Render the specified lattice value to the specified stream. - virtual void PrintValue(LatticeVal V, raw_ostream &OS); }; /// SparseSolver - This class is a general purpose solver for Sparse Conditional /// Propagation with a programmable lattice function. -template class SparseSolver { +template +class SparseSolver { /// LatticeFunc - This is the object that knows the lattice and how to /// compute transfer functions. - AbstractLatticeFunction *LatticeFunc; + AbstractLatticeFunction *LatticeFunc; - /// ValueState - Holds the lattice state associated with LLVM values. - DenseMap ValueState; + /// ValueState - Holds the LatticeVals associated with LatticeKeys. + DenseMap ValueState; /// BBExecutable - Holds the basic blocks that are executable. SmallPtrSet BBExecutable; @@ -124,28 +134,29 @@ std::set KnownFeasibleEdges; public: - explicit SparseSolver(AbstractLatticeFunction *Lattice) + explicit SparseSolver( + AbstractLatticeFunction *Lattice) : LatticeFunc(Lattice) {} SparseSolver(const SparseSolver &) = delete; SparseSolver &operator=(const SparseSolver &) = delete; /// Solve - Solve for constants and executable blocks. - void Solve(Function &F); + void Solve(); - void Print(Function &F, raw_ostream &OS) const; + void Print(raw_ostream &OS) const; /// getExistingValueState - Return the LatticeVal object corresponding to the /// given value from the ValueState map. If the value is not in the map, /// UntrackedVal is returned, unlike the getValueState method. - LatticeVal getExistingValueState(Value *V) const { - auto I = ValueState.find(V); + LatticeVal getExistingValueState(LatticeKey Key) const { + auto I = ValueState.find(Key); return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal(); } /// getValueState - Return the LatticeVal object corresponding to the given /// value from the ValueState map. If the value is not in the map, its state /// is initialized. - LatticeVal getValueState(Value *V); + LatticeVal getValueState(LatticeKey Key); /// isEdgeFeasible - Return true if the control flow edge from the 'From' /// basic block to the 'To' basic block is currently feasible. If @@ -162,15 +173,16 @@ return BBExecutable.count(BB); } -private: - /// UpdateState - When the state for some instruction is potentially updated, - /// this function notices and adds I to the worklist if needed. - void UpdateState(Instruction &Inst, LatticeVal V); - /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void MarkBlockExecutable(BasicBlock *BB); +private: + /// UpdateState - When the state of some LatticeKey is potentially updated to + /// the given LatticeVal, this function notices and adds the LLVM value + /// corresponding the key to the work list, if needed. + void UpdateState(LatticeKey Key, LatticeVal LV); + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB /// work list if it is not already executable. void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); @@ -189,9 +201,9 @@ // AbstractLatticeFunction Implementation //===----------------------------------------------------------------------===// -template -void AbstractLatticeFunction::PrintValue(LatticeVal V, - raw_ostream &OS) { +template +void AbstractLatticeFunction::PrintLatticeVal( + LatticeVal V, raw_ostream &OS) { if (V == UndefVal) OS << "undefined"; else if (V == OverdefinedVal) @@ -202,57 +214,59 @@ OS << "unknown lattice value"; } +template +void AbstractLatticeFunction::PrintLatticeKey( + LatticeKey Key, raw_ostream &OS) { + OS << "unknown lattice key"; +} + //===----------------------------------------------------------------------===// // SparseSolver Implementation //===----------------------------------------------------------------------===// -template -LatticeVal SparseSolver::getValueState(Value *V) { - auto I = ValueState.find(V); +template +LatticeVal +SparseSolver::getValueState(LatticeKey Key) { + auto I = ValueState.find(Key); if (I != ValueState.end()) return I->second; // Common case, in the map - LatticeVal LV; - if (LatticeFunc->IsUntrackedValue(V)) + if (LatticeFunc->IsUntrackedValue(Key)) return LatticeFunc->getUntrackedVal(); - else if (Constant *C = dyn_cast(V)) - LV = LatticeFunc->ComputeConstant(C); - else if (Argument *A = dyn_cast(V)) - LV = LatticeFunc->ComputeArgument(A); - else if (!isa(V)) - // All other non-instructions are overdefined. - LV = LatticeFunc->getOverdefinedVal(); - else - // All instructions are underdefined by default. - LV = LatticeFunc->getUndefVal(); + LatticeVal LV = LatticeFunc->ComputeLatticeVal(Key); // If this value is untracked, don't add it to the map. if (LV == LatticeFunc->getUntrackedVal()) return LV; - return ValueState[V] = LV; + return ValueState[Key] = LV; } -template -void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { - auto I = ValueState.find(&Inst); - if (I != ValueState.end() && I->second == V) +template +void SparseSolver::UpdateState(LatticeKey Key, + LatticeVal LV) { + auto I = ValueState.find(Key); + if (I != ValueState.end() && I->second == LV) return; // No change. - // An update. Visit uses of I. - ValueState[&Inst] = V; - ValueWorkList.push_back(&Inst); + // Update the state of the given LatticeKey and add its corresponding LLVM + // value to the work list. + ValueState[Key] = LV; + if (Value *V = KeyInfo::getValueFromLatticeKey(Key)) + ValueWorkList.push_back(V); } -template -void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { +template +void SparseSolver::MarkBlockExecutable( + BasicBlock *BB) { + if (!BBExecutable.insert(BB).second) + return; DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); - BBExecutable.insert(BB); // Basic block is executable! BBWorkList.push_back(BB); // Add the block to the work list! } -template -void SparseSolver::markEdgeExecutable(BasicBlock *Source, - BasicBlock *Dest) { +template +void SparseSolver::markEdgeExecutable( + BasicBlock *Source, BasicBlock *Dest) { if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) return; // This edge is already known to be executable! @@ -270,8 +284,8 @@ } } -template -void SparseSolver::getFeasibleSuccessors( +template +void SparseSolver::getFeasibleSuccessors( TerminatorInst &TI, SmallVectorImpl &Succs, bool AggressiveUndef) { Succs.resize(TI.getNumSuccessors()); if (TI.getNumSuccessors() == 0) @@ -285,9 +299,11 @@ LatticeVal BCValue; if (AggressiveUndef) - BCValue = getValueState(BI->getCondition()); + BCValue = + getValueState(KeyInfo::getLatticeKeyFromValue(BI->getCondition())); else - BCValue = getExistingValueState(BI->getCondition()); + BCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(BI->getCondition())); if (BCValue == LatticeFunc->getOverdefinedVal() || BCValue == LatticeFunc->getUntrackedVal()) { @@ -300,7 +316,9 @@ if (BCValue == LatticeFunc->getUndefVal()) return; - Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); + Constant *C = + dyn_cast_or_null(LatticeFunc->GetValueFromLatticeVal( + BCValue, BI->getCondition()->getType())); if (!C || !isa(C)) { // Non-constant values can go either way. Succs[0] = Succs[1] = true; @@ -312,10 +330,8 @@ return; } - if (isa(TI)) { - // Invoke instructions successors are always executable. - // TODO: Could ask the lattice function if the value can throw. - Succs[0] = Succs[1] = true; + if (TI.isExceptional()) { + Succs.assign(Succs.size(), true); return; } @@ -327,9 +343,10 @@ SwitchInst &SI = cast(TI); LatticeVal SCValue; if (AggressiveUndef) - SCValue = getValueState(SI.getCondition()); + SCValue = getValueState(KeyInfo::getLatticeKeyFromValue(SI.getCondition())); else - SCValue = getExistingValueState(SI.getCondition()); + SCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(SI.getCondition())); if (SCValue == LatticeFunc->getOverdefinedVal() || SCValue == LatticeFunc->getUntrackedVal()) { @@ -342,7 +359,8 @@ if (SCValue == LatticeFunc->getUndefVal()) return; - Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); + Constant *C = dyn_cast_or_null(LatticeFunc->GetValueFromLatticeVal( + SCValue, SI.getCondition()->getType())); if (!C || !isa(C)) { // All destinations are executable! Succs.assign(TI.getNumSuccessors(), true); @@ -352,9 +370,9 @@ Succs[Case.getSuccessorIndex()] = true; } -template -bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, - bool AggressiveUndef) { +template +bool SparseSolver::isEdgeFeasible( + BasicBlock *From, BasicBlock *To, bool AggressiveUndef) { SmallVector SuccFeasible; TerminatorInst *TI = From->getTerminator(); getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); @@ -366,8 +384,9 @@ return false; } -template -void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { +template +void SparseSolver::visitTerminatorInst( + TerminatorInst &TI) { SmallVector SuccFeasible; getFeasibleSuccessors(TI, SuccFeasible, true); @@ -379,19 +398,22 @@ markEdgeExecutable(BB, TI.getSuccessor(i)); } -template -void SparseSolver::visitPHINode(PHINode &PN) { +template +void SparseSolver::visitPHINode(PHINode &PN) { // The lattice function may store more information on a PHINode than could be // computed from its incoming values. For example, SSI form stores its sigma // functions as PHINodes with a single incoming value. if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); - if (IV != LatticeFunc->getUntrackedVal()) - UpdateState(PN, IV); + DenseMap ChangedValues; + LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); return; } - LatticeVal PNIV = getValueState(&PN); + LatticeKey Key = KeyInfo::getLatticeKeyFromValue(&PN); + LatticeVal PNIV = getValueState(Key); LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); // If this value is already overdefined (common) just return. @@ -401,7 +423,7 @@ // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, // and slow us down a lot. Just mark them overdefined. if (PN.getNumIncomingValues() > 64) { - UpdateState(PN, Overdefined); + UpdateState(Key, Overdefined); return; } @@ -414,7 +436,8 @@ continue; // Merge in this value. - LatticeVal OpVal = getValueState(PN.getIncomingValue(i)); + LatticeVal OpVal = + getValueState(KeyInfo::getLatticeKeyFromValue(PN.getIncomingValue(i))); if (OpVal != PNIV) PNIV = LatticeFunc->MergeValues(PNIV, OpVal); @@ -423,11 +446,11 @@ } // Update the PHI with the compute value, which is the merge of the inputs. - UpdateState(PN, PNIV); + UpdateState(Key, PNIV); } -template -void SparseSolver::visitInst(Instruction &I) { +template +void SparseSolver::visitInst(Instruction &I) { // PHIs are handled by the propagation logic, they are never passed into the // transfer functions. if (PHINode *PN = dyn_cast(&I)) @@ -435,17 +458,18 @@ // Otherwise, ask the transfer function what the result is. If this is // something that we care about, remember it. - LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); - if (IV != LatticeFunc->getUntrackedVal()) - UpdateState(I, IV); + DenseMap ChangedValues; + LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); if (TerminatorInst *TI = dyn_cast(&I)) visitTerminatorInst(*TI); } -template void SparseSolver::Solve(Function &F) { - MarkBlockExecutable(&F.getEntryBlock()); - +template +void SparseSolver::Solve() { // Process the work lists until they are empty! while (!BBWorkList.empty() || !ValueWorkList.empty()) { // Process the value work list. @@ -478,22 +502,24 @@ } } -template -void SparseSolver::Print(Function &F, raw_ostream &OS) const { - OS << "\nFUNCTION: " << F.getName() << "\n"; - for (auto &BB : F) { - if (!BBExecutable.count(&BB)) - OS << "INFEASIBLE: "; - OS << "\t"; - if (BB.hasName()) - OS << BB.getName() << ":\n"; - else - OS << "; anon bb\n"; - for (auto &I : BB) { - LatticeFunc->PrintValue(getExistingValueState(&I), OS); - OS << I << "\n"; - } +template +void SparseSolver::Print( + raw_ostream &OS) const { + if (ValueState.empty()) + return; + LatticeKey Key; + LatticeVal LV; + + OS << "ValueState:\n"; + for (auto &Entry : ValueState) { + std::tie(Key, LV) = Entry; + if (LV == LatticeFunc->getUntrackedVal()) + continue; + OS << "\t"; + LatticeFunc->PrintLatticeVal(LV, OS); + OS << ": "; + LatticeFunc->PrintLatticeKey(Key, OS); OS << "\n"; } } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def llvm-toolchain-snapshot-6.0~svn316003/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def 2017-10-17 14:41:11.000000000 +0000 @@ -14,3 +14,4 @@ ELF_RELOC(R_AMDGPU_GOTPCREL32_HI, 9) ELF_RELOC(R_AMDGPU_REL32_LO, 10) ELF_RELOC(R_AMDGPU_REL32_HI, 11) +ELF_RELOC(R_AMDGPU_RELATIVE64, 13) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/FaultMaps.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/FaultMaps.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/FaultMaps.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/FaultMaps.h 2017-10-17 14:41:11.000000000 +0000 @@ -39,6 +39,9 @@ void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel); void serializeToFaultMapSection(); + void reset() { + FunctionInfos.clear(); + } private: static const char *WFMP; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/GlobalISel/InstructionSelector.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/GlobalISel/InstructionSelector.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/GlobalISel/InstructionSelector.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/GlobalISel/InstructionSelector.h 2017-10-17 14:41:11.000000000 +0000 @@ -117,6 +117,11 @@ /// - OpIdx - Operand index /// - Expected type GIM_CheckType, + /// Check the type of a pointer to any address space. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - SizeInBits - The size of the pointer value in bits. + GIM_CheckPointerToAny, /// Check the register bank for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index @@ -291,7 +296,7 @@ const I64ImmediatePredicateFn *I64ImmPredicateFns; const APIntImmediatePredicateFn *APIntImmPredicateFns; const APFloatImmediatePredicateFn *APFloatImmPredicateFns; - const std::vector ComplexPredicates; + const ComplexMatcherMemFn *ComplexPredicates; }; protected: @@ -336,6 +341,12 @@ bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; + /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the + /// right-hand side. GlobalISel's separation of pointer and integer types + /// means that we don't need to worry about G_OR with equivalent semantics. + bool isBaseWithConstantOffset(const MachineOperand &Root, + const MachineRegisterInfo &MRI) const; + bool isObviouslySafeToFold(MachineInstr &MI) const; }; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h 2017-10-17 14:41:11.000000000 +0000 @@ -244,7 +244,31 @@ } break; } + case GIM_CheckPointerToAny: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t SizeInBits = MatchTable[CurrentIdx++]; + + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs[" << InsnID + << "]->getOperand(" << OpIdx + << "), SizeInBits=" << SizeInBits << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + // iPTR must be looked up in the target. + if (SizeInBits == 0) { + MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); + SizeInBits = MF->getDataLayout().getPointerSizeInBits(0); + } + assert(SizeInBits != 0 && "Pointer size must be known"); + + const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); + if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckRegBankForClass: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/MachineRegisterInfo.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/MachineRegisterInfo.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/CodeGen/MachineRegisterInfo.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/CodeGen/MachineRegisterInfo.h 2017-10-17 14:41:11.000000000 +0000 @@ -842,6 +842,10 @@ livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } + ArrayRef> liveins() const { + return LiveIns; + } + bool isLiveIn(unsigned Reg) const; /// getLiveInPhysReg - If VReg is a live-in virtual register, return the diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/IR/LLVMContext.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/IR/LLVMContext.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/IR/LLVMContext.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/IR/LLVMContext.h 2017-10-17 14:41:11.000000000 +0000 @@ -100,6 +100,7 @@ MD_section_prefix = 20, // "section_prefix" MD_absolute_symbol = 21, // "absolute_symbol" MD_associated = 22, // "associated" + MD_callees = 23, // "callees" }; /// Known operand bundle tag IDs, which always have the same value. All diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/IR/MDBuilder.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/IR/MDBuilder.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/IR/MDBuilder.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/IR/MDBuilder.h 2017-10-17 14:41:11.000000000 +0000 @@ -85,6 +85,14 @@ MDNode *createRange(Constant *Lo, Constant *Hi); //===------------------------------------------------------------------===// + // Callees metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata indicating the possible callees of indirect + /// calls. + MDNode *createCallees(ArrayRef Callees); + + //===------------------------------------------------------------------===// // AA metadata. //===------------------------------------------------------------------===// diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/MC/MCInst.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/MC/MCInst.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/MC/MCInst.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/MC/MCInst.h 2017-10-17 14:41:11.000000000 +0000 @@ -160,6 +160,10 @@ unsigned Opcode = 0; SMLoc Loc; SmallVector Operands; + // These flags could be used to pass some info from one target subcomponent + // to another, for example, from disassembler to asm printer. The values of + // the flags have any sense on target level only (e.g. prefixes on x86). + unsigned Flags = 0; public: MCInst() = default; @@ -167,6 +171,9 @@ void setOpcode(unsigned Op) { Opcode = Op; } unsigned getOpcode() const { return Opcode; } + void setFlags(unsigned F) { Flags = F; } + unsigned getFlags() const { return Flags; } + void setLoc(SMLoc loc) { Loc = loc; } SMLoc getLoc() const { return Loc; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/GVN.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/GVN.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/GVN.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/GVN.h 2017-10-17 14:41:11.000000000 +0000 @@ -18,7 +18,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -28,7 +27,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" -#include "llvm/Transforms/Utils/OrderedInstructions.h" #include #include #include @@ -158,11 +156,7 @@ AssumptionCache *AC; SetVector DeadBlocks; OptimizationRemarkEmitter *ORE; - // Maps a block to the topmost instruction with implicit control flow in it. - DenseMap - FirstImplicitControlFlowInsts; - OrderedInstructions *OI; ValueTable VN; /// A mapping from value numbers to lists of Value*'s that @@ -274,7 +268,6 @@ BasicBlock *Curr, unsigned int ValNo); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); - void fillImplicitControlFlowInfo(ReversePostOrderTraversal &RPOT); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/IndVarSimplify.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/IndVarSimplify.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/IndVarSimplify.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/IndVarSimplify.h 2017-10-17 14:41:11.000000000 +0000 @@ -15,17 +15,20 @@ #ifndef LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H #define LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + class IndVarSimplifyPass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopDistribute.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopDistribute.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopDistribute.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopDistribute.h 2017-10-17 14:41:11.000000000 +0000 @@ -21,10 +21,13 @@ namespace llvm { +class Function; + class LoopDistributePass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h 2017-10-17 14:41:11.000000000 +0000 @@ -1,4 +1,4 @@ -//===- LoopIdiomRecognize.h - Loop Idiom Recognize Pass -------*- C++ -*-===// +//===- LoopIdiomRecognize.h - Loop Idiom Recognize Pass ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,18 +16,21 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H #define LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + /// Performs Loop Idiom Recognize Pass. class LoopIdiomRecognizePass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopInstSimplify.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopInstSimplify.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopInstSimplify.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopInstSimplify.h 2017-10-17 14:41:11.000000000 +0000 @@ -1,4 +1,4 @@ -//===- LoopInstSimplify.h - Loop Inst Simplify Pass -------*- C++ -*-===// +//===- LoopInstSimplify.h - Loop Inst Simplify Pass -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,18 +14,21 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H #define LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + /// Performs Loop Inst Simplify Pass. class LoopInstSimplifyPass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopLoadElimination.h llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopLoadElimination.h --- llvm-toolchain-snapshot-6.0~svn315865/include/llvm/Transforms/Scalar/LoopLoadElimination.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/include/llvm/Transforms/Scalar/LoopLoadElimination.h 2017-10-17 14:41:11.000000000 +0000 @@ -1,4 +1,4 @@ -//===---- LoopLoadElimination.h ---------------------------------*- C++ -*-===// +//===- LoopLoadElimination.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This header defines the LoopLoadEliminationPass object. This pass forwards /// loaded values around loop backedges to allow their use in subsequent /// iterations. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H @@ -20,11 +21,14 @@ namespace llvm { +class Function; + /// Pass to forward loads in a loop around the backedge to subsequent /// iterations. struct LoopLoadEliminationPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Analysis/ScalarEvolution.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Analysis/ScalarEvolution.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Analysis/ScalarEvolution.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Analysis/ScalarEvolution.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -6293,7 +6293,6 @@ BackedgeTakenInfo Result = computeBackedgeTakenCount(L, /*AllowPredicates=*/true); - addToLoopUseLists(Result, L); return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); } @@ -6369,7 +6368,6 @@ // recusive call to getBackedgeTakenInfo (on a different // loop), which would invalidate the iterator computed // earlier. - addToLoopUseLists(Result, L); return BackedgeTakenCounts.find(L)->second = std::move(Result); } @@ -6407,14 +6405,8 @@ auto LoopUsersItr = LoopUsers.find(CurrL); if (LoopUsersItr != LoopUsers.end()) { - for (auto LoopOrSCEV : LoopUsersItr->second) { - if (auto *S = LoopOrSCEV.dyn_cast()) - forgetMemoizedResults(S); - else { - BackedgeTakenCounts.erase(LoopOrSCEV.get()); - PredicatedBackedgeTakenCounts.erase(LoopOrSCEV.get()); - } - } + for (auto *S : LoopUsersItr->second) + forgetMemoizedResults(S); LoopUsers.erase(LoopUsersItr); } @@ -6559,34 +6551,6 @@ return false; } -static void findUsedLoopsInSCEVExpr(const SCEV *S, - SmallPtrSetImpl &Result) { - struct FindUsedLoops { - SmallPtrSetImpl &LoopsUsed; - FindUsedLoops(SmallPtrSetImpl &LoopsUsed) - : LoopsUsed(LoopsUsed) {} - bool follow(const SCEV *S) { - if (auto *AR = dyn_cast(S)) - LoopsUsed.insert(AR->getLoop()); - return true; - } - - bool isDone() const { return false; } - }; - FindUsedLoops F(Result); - SCEVTraversal(F).visitAll(S); -} - -void ScalarEvolution::BackedgeTakenInfo::findUsedLoops( - ScalarEvolution &SE, SmallPtrSetImpl &Result) const { - if (auto *S = getMax()) - if (S != SE.getCouldNotCompute()) - findUsedLoopsInSCEVExpr(S, Result); - for (auto &ENT : ExitNotTaken) - if (ENT.ExactNotTaken != SE.getCouldNotCompute()) - findUsedLoopsInSCEVExpr(ENT.ExactNotTaken, Result); -} - ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) : ExactNotTaken(E), MaxNotTaken(E) { assert((isa(MaxNotTaken) || @@ -9725,11 +9689,11 @@ return getUDivExpr(Delta, Step); } -const SCEV *ScalarEvolution::computeMaxBECount(const SCEV *Start, - const SCEV *Stride, - const SCEV *End, - unsigned BitWidth, - bool IsSigned) { +const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, + const SCEV *Stride, + const SCEV *End, + unsigned BitWidth, + bool IsSigned) { assert(!isKnownNonPositive(Stride) && "Stride is expected strictly positive!"); @@ -9861,7 +9825,7 @@ // bound of the loop (RHS), and the fact that IV does not overflow (which is // checked above). if (!isLoopInvariant(RHS, L)) { - const SCEV *MaxBECount = computeMaxBECount( + const SCEV *MaxBECount = computeMaxBECountForLT( Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, false /*MaxOrZero*/, Predicates); @@ -9898,8 +9862,8 @@ MaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { - MaxBECount = computeMaxBECount(Start, Stride, RHS, - getTypeSizeInBits(LHS->getType()), IsSigned); + MaxBECount = computeMaxBECountForLT( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa(MaxBECount) && @@ -11070,6 +11034,21 @@ ++I; } + auto RemoveSCEVFromBackedgeMap = + [S, this](DenseMap &Map) { + for (auto I = Map.begin(), E = Map.end(); I != E;) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + Map.erase(I++); + } else + ++I; + } + }; + + RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); + RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); + // TODO: There is a suspicion that we only need to do it when there is a // SCEVUnknown somewhere inside S. Need to check this. if (EraseExitLimit) @@ -11079,19 +11058,22 @@ } void ScalarEvolution::addToLoopUseLists(const SCEV *S) { - SmallPtrSet LoopsUsed; - findUsedLoopsInSCEVExpr(S, LoopsUsed); - for (auto *L : LoopsUsed) - LoopUsers[L].push_back({S}); -} + struct FindUsedLoops { + SmallPtrSet LoopsUsed; + bool follow(const SCEV *S) { + if (auto *AR = dyn_cast(S)) + LoopsUsed.insert(AR->getLoop()); + return true; + } -void ScalarEvolution::addToLoopUseLists( - const ScalarEvolution::BackedgeTakenInfo &BTI, const Loop *L) { - SmallPtrSet LoopsUsed; - BTI.findUsedLoops(*this, LoopsUsed); + bool isDone() const { return false; } + }; + + FindUsedLoops F; + SCEVTraversal(F).visitAll(S); - for (auto *UsedL : LoopsUsed) - LoopUsers[UsedL].push_back({L}); + for (auto *L : F.LoopsUsed) + LoopUsers[L].push_back(S); } void ScalarEvolution::verify() const { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Analysis/ValueTracking.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Analysis/ValueTracking.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Analysis/ValueTracking.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Analysis/ValueTracking.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -789,14 +789,14 @@ } } -// Compute known bits from a shift operator, including those with a -// non-constant shift amount. Known is the outputs of this function. Known2 is a -// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are -// operator-specific functors that, given the known-zero or known-one bits -// respectively, and a shift amount, compute the implied known-zero or known-one -// bits of the shift operator's result respectively for that shift amount. The -// results from calling KZF and KOF are conservatively combined for all -// permitted shift amounts. +/// Compute known bits from a shift operator, including those with a +/// non-constant shift amount. Known is the output of this function. Known2 is a +/// pre-allocated temporary with the same bit width as Known. KZF and KOF are +/// operator-specific functors that, given the known-zero or known-one bits +/// respectively, and a shift amount, compute the implied known-zero or +/// known-one bits of the shift operator's result respectively for that shift +/// amount. The results from calling KZF and KOF are conservatively combined for +/// all permitted shift amounts. static void computeKnownBitsFromShiftOperator( const Operator *I, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q, @@ -847,8 +847,7 @@ // Early exit if we can't constrain any well-defined shift amount. if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { - ShifterOperandIsNonZero = - isKnownNonZero(I->getOperand(1), Depth + 1, Q); + ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } @@ -1095,7 +1094,7 @@ break; } case Instruction::LShr: { - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { APInt KZResult = KnownZero.lshr(ShiftAmt); // High bits known zero. @@ -4084,6 +4083,14 @@ Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS) { + assert(!ICmpInst::isEquality(Pred) && "Expected not equality predicate only!"); + + // First, check if select has inverse order of what we will check below: + if (CmpRHS == FalseVal) { + std::swap(TrueVal, FalseVal); + Pred = CmpInst::getInversePredicate(Pred); + } + // Assume success. If there's no match, callers should not use these anyway. LHS = TrueVal; RHS = FalseVal; @@ -4096,26 +4103,30 @@ // (X SMAX(SMIN(X, C2), C1) if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && - C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) + C1->slt(*C2) && + (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE)) return {SPF_SMAX, SPNB_NA, false}; // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && - C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) + C1->sgt(*C2) && + (Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE)) return {SPF_SMIN, SPNB_NA, false}; // (X UMAX(UMIN(X, C2), C1) if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && - C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) + C1->ult(*C2) && + (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE)) return {SPF_UMAX, SPNB_NA, false}; // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && - C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) + C1->ugt(*C2) && + (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE)) return {SPF_UMIN, SPNB_NA, false}; } - if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) + if (!CmpInst::isSigned(Pred)) return {SPF_UNKNOWN, SPNB_NA, false}; // Z = X -nsw Y @@ -4123,14 +4134,18 @@ // (X (Z SMAX(Z, 0) if (match(TrueVal, m_Zero()) && match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; + return {(Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE) ? SPF_SMIN + : SPF_SMAX, + SPNB_NA, false}; // Z = X -nsw Y // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) // (X (Z SMIN(Z, 0) if (match(FalseVal, m_Zero()) && match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; + return {(Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE) ? SPF_SMAX + : SPF_SMIN, + SPNB_NA, false}; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4142,14 +4157,15 @@ // Is the sign bit set? // (X (X >u MAXVAL) ? X : MAXVAL ==> UMAX // (X (X >u MAXVAL) ? MAXVAL : X ==> UMIN - if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) + if ((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE) && *C1 == 0 && + C2->isMaxSignedValue()) return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; // Is the sign bit clear? // (X >s -1) ? MINVAL : X ==> (X UMAX // (X >s -1) ? X : MINVAL ==> (X UMIN - if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() && - C2->isMinSignedValue()) + if ((Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE) && + C1->isAllOnesValue() && C2->isMinSignedValue()) return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; } @@ -4158,13 +4174,17 @@ // (X (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C) if (match(TrueVal, m_Not(m_Specific(CmpLHS))) && match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; + return {(Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE) ? SPF_SMIN + : SPF_SMAX, + SPNB_NA, false}; // (X >s C) ? ~C : ~X ==> (~X SMAX(~C, ~X) // (X (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X) if (match(FalseVal, m_Not(m_Specific(CmpLHS))) && match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) - return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; + return {(Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE) ? SPF_SMAX + : SPF_SMIN, + SPNB_NA, false}; return {SPF_UNKNOWN, SPNB_NA, false}; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/GlobalISel/InstructionSelector.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/GlobalISel/InstructionSelector.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/GlobalISel/InstructionSelector.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/GlobalISel/InstructionSelector.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -29,7 +30,7 @@ using namespace llvm; InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers, {}), MIs() {} + : Renderers(MaxRenderers), MIs() {} InstructionSelector::InstructionSelector() = default; @@ -98,6 +99,23 @@ return false; } +bool InstructionSelector::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_GEP) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const { return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().begin() == MI.implicit_operands().end(); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/ImplicitNullChecks.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/ImplicitNullChecks.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/ImplicitNullChecks.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/ImplicitNullChecks.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -369,7 +369,7 @@ // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() && - Offset < PageSize)) + -PageSize < Offset && Offset < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/MachineCopyPropagation.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/MachineCopyPropagation.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/MachineCopyPropagation.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/MachineCopyPropagation.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -286,7 +286,7 @@ // it's no longer available for copy propagation. RegList &DestList = SrcMap[Src]; if (!is_contained(DestList, Def)) - DestList.push_back(Def); + DestList.push_back(Def); continue; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/MIRPrinter.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/MIRPrinter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/MIRPrinter.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/MIRPrinter.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -297,11 +297,11 @@ } // Print the live ins. - for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) { + for (std::pair LI : RegInfo.liveins()) { yaml::MachineFunctionLiveIn LiveIn; - printReg(I->first, LiveIn.Register, TRI); - if (I->second) - printReg(I->second, LiveIn.VirtualRegister, TRI); + printReg(LI.first, LiveIn.Register, TRI); + if (LI.second) + printReg(LI.second, LiveIn.VirtualRegister, TRI); MF.LiveIns.push_back(LiveIn); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -40,8 +40,10 @@ SDValue Res = SDValue(); // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(ResNo), true)) + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + DEBUG(dbgs() << "Node has been custom expanded, done\n"); return; + } switch (N->getOpcode()) { default: @@ -885,8 +887,10 @@ DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; + } switch (N->getOpcode()) { default: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -226,15 +226,21 @@ assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - if (IgnoreNodeResults(N)) + DEBUG(dbgs() << "Legalizing node: "; N->dump()); + if (IgnoreNodeResults(N)) { + DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; + } // Scan the values produced by the node, checking to see if any result // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); + DEBUG(dbgs() << "Analyzing result type: " << + ResultVT.getEVTString() << "\n"); switch (getTypeAction(ResultVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal result type\n"); break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results @@ -291,9 +297,12 @@ if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - EVT OpVT = N->getOperand(i).getValueType(); + const auto Op = N->getOperand(i); + DEBUG(dbgs() << "Analyzing operand: "; Op.dump()); + EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal operand\n"); continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -494,10 +494,9 @@ DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) - for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), - E = RegInfo->livein_end(); LI != E; ++LI) - if (LI->second) - LiveInMap.insert(std::make_pair(LI->first, LI->second)); + for (std::pair LI : RegInfo->liveins()) + if (LI.second) + LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/CodeGen/SelectionDAG/TargetLowering.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/CodeGen/SelectionDAG/TargetLowering.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -469,7 +469,7 @@ } NewOps.push_back(User->getOperand(i)); } - TLO.DAG.UpdateNodeOperands(User, NewOps); + User = TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -163,6 +163,7 @@ case DW_CFA_same_value: case DW_CFA_def_cfa_register: case DW_CFA_def_cfa_offset: + case DW_CFA_GNU_args_size: // Operands: ULEB128 addInstruction(Opcode, Data.getULEB128(Offset)); break; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h llvm-toolchain-snapshot-6.0~svn316003/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h 2017-10-17 14:41:12.000000000 +0000 @@ -209,7 +209,7 @@ DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_I386_SECREL Value: " << RE.Addend << '\n'); - writeBytesUnaligned(RE.Addend, Target, 2); + writeBytesUnaligned(RE.Addend, Target, 4); break; default: llvm_unreachable("unsupported relocation type"); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/afl/afl_driver.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/afl/afl_driver.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/afl/afl_driver.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/afl/afl_driver.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,335 +0,0 @@ -//===- afl_driver.cpp - a glue between AFL and libFuzzer --------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// - -/* This file allows to fuzz libFuzzer-style target functions - (LLVMFuzzerTestOneInput) with AFL using AFL's persistent (in-process) mode. - -Usage: -################################################################################ -cat << EOF > test_fuzzer.cc -#include -#include -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - if (size > 0 && data[0] == 'H') - if (size > 1 && data[1] == 'I') - if (size > 2 && data[2] == '!') - __builtin_trap(); - return 0; -} -EOF -# Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang. -clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c -# Build afl-llvm-rt.o.c from the AFL distribution. -clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c -# Build this file, link it with afl-llvm-rt.o.o and the target code. -clang++ afl_driver.cpp test_fuzzer.o afl-llvm-rt.o.o -# Run AFL: -rm -rf IN OUT; mkdir IN OUT; echo z > IN/z; -$AFL_HOME/afl-fuzz -i IN -o OUT ./a.out -################################################################################ -Environment Variables: -There are a few environment variables that can be set to use features that -afl-fuzz doesn't have. - -AFL_DRIVER_STDERR_DUPLICATE_FILENAME: Setting this *appends* stderr to the file -specified. If the file does not exist, it is created. This is useful for getting -stack traces (when using ASAN for example) or original error messages on hard to -reproduce bugs. - -AFL_DRIVER_EXTRA_STATS_FILENAME: Setting this causes afl_driver to write extra -statistics to the file specified. Currently these are peak_rss_mb -(the peak amount of virtual memory used in MB) and slowest_unit_time_secs. If -the file does not exist it is created. If the file does exist then -afl_driver assumes it was restarted by afl-fuzz and will try to read old -statistics from the file. If that fails then the process will quit. - -*/ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -// Platform detection. Copied from FuzzerInternal.h -#ifdef __linux__ -#define LIBFUZZER_LINUX 1 -#define LIBFUZZER_APPLE 0 -#elif __APPLE__ -#define LIBFUZZER_LINUX 0 -#define LIBFUZZER_APPLE 1 -#else -#error "Support for your platform has not been implemented" -#endif - -// Used to avoid repeating error checking boilerplate. If cond is false, a -// fatal error has occured in the program. In this event print error_message -// to stderr and abort(). Otherwise do nothing. Note that setting -// AFL_DRIVER_STDERR_DUPLICATE_FILENAME may cause error_message to be appended -// to the file as well, if the error occurs after the duplication is performed. -#define CHECK_ERROR(cond, error_message) \ - if (!(cond)) { \ - fprintf(stderr, (error_message)); \ - abort(); \ - } - -// libFuzzer interface is thin, so we don't include any libFuzzer headers. -extern "C" { -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); -__attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv); -} - -// Notify AFL about persistent mode. -static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##"; -extern "C" int __afl_persistent_loop(unsigned int); -static volatile char suppress_warning2 = AFL_PERSISTENT[0]; - -// Notify AFL about deferred forkserver. -static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##"; -extern "C" void __afl_manual_init(); -static volatile char suppress_warning1 = AFL_DEFER_FORKSVR[0]; - -// Input buffer. -static const size_t kMaxAflInputSize = 1 << 20; -static uint8_t AflInputBuf[kMaxAflInputSize]; - -// Variables we need for writing to the extra stats file. -static FILE *extra_stats_file = NULL; -static uint32_t previous_peak_rss = 0; -static time_t slowest_unit_time_secs = 0; -static const int kNumExtraStats = 2; -static const char *kExtraStatsFormatString = "peak_rss_mb : %u\n" - "slowest_unit_time_sec : %u\n"; - -// Copied from FuzzerUtil.cpp. -size_t GetPeakRSSMb() { - struct rusage usage; - if (getrusage(RUSAGE_SELF, &usage)) - return 0; - if (LIBFUZZER_LINUX) { - // ru_maxrss is in KiB - return usage.ru_maxrss >> 10; - } else if (LIBFUZZER_APPLE) { - // ru_maxrss is in bytes - return usage.ru_maxrss >> 20; - } - assert(0 && "GetPeakRSSMb() is not implemented for your platform"); - return 0; -} - -// Based on SetSigaction in FuzzerUtil.cpp -static void SetSigaction(int signum, - void (*callback)(int, siginfo_t *, void *)) { - struct sigaction sigact; - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_sigaction = callback; - if (sigaction(signum, &sigact, 0)) { - fprintf(stderr, "libFuzzer: sigaction failed with %d\n", errno); - exit(1); - } -} - -// Write extra stats to the file specified by the user. If none is specified -// this function will never be called. -static void write_extra_stats() { - uint32_t peak_rss = GetPeakRSSMb(); - - if (peak_rss < previous_peak_rss) - peak_rss = previous_peak_rss; - - int chars_printed = fprintf(extra_stats_file, kExtraStatsFormatString, - peak_rss, slowest_unit_time_secs); - - CHECK_ERROR(chars_printed != 0, "Failed to write extra_stats_file"); - - CHECK_ERROR(fclose(extra_stats_file) == 0, - "Failed to close extra_stats_file"); -} - -// Call write_extra_stats before we exit. -static void crash_handler(int, siginfo_t *, void *) { - // Make sure we don't try calling write_extra_stats again if we crashed while - // trying to call it. - static bool first_crash = true; - CHECK_ERROR(first_crash, - "Crashed in crash signal handler. This is a bug in the fuzzer."); - - first_crash = false; - write_extra_stats(); -} - -// If the user has specified an extra_stats_file through the environment -// variable AFL_DRIVER_EXTRA_STATS_FILENAME, then perform necessary set up -// to write stats to it on exit. If no file is specified, do nothing. Otherwise -// install signal and exit handlers to write to the file when the process exits. -// Then if the file doesn't exist create it and set extra stats to 0. But if it -// does exist then read the initial values of the extra stats from the file -// and check that the file is writable. -static void maybe_initialize_extra_stats() { - // If AFL_DRIVER_EXTRA_STATS_FILENAME isn't set then we have nothing to do. - char *extra_stats_filename = getenv("AFL_DRIVER_EXTRA_STATS_FILENAME"); - if (!extra_stats_filename) - return; - - // Open the file and find the previous peak_rss_mb value. - // This is necessary because the fuzzing process is restarted after N - // iterations are completed. So we may need to get this value from a previous - // process to be accurate. - extra_stats_file = fopen(extra_stats_filename, "r"); - - // If extra_stats_file already exists: read old stats from it. - if (extra_stats_file) { - int matches = fscanf(extra_stats_file, kExtraStatsFormatString, - &previous_peak_rss, &slowest_unit_time_secs); - - // Make sure we have read a real extra stats file and that we have used it - // to set slowest_unit_time_secs and previous_peak_rss. - CHECK_ERROR(matches == kNumExtraStats, "Extra stats file is corrupt"); - - CHECK_ERROR(fclose(extra_stats_file) == 0, "Failed to close file"); - - // Now open the file for writing. - extra_stats_file = fopen(extra_stats_filename, "w"); - CHECK_ERROR(extra_stats_file, - "Failed to open extra stats file for writing"); - } else { - // Looks like this is the first time in a fuzzing job this is being called. - extra_stats_file = fopen(extra_stats_filename, "w+"); - CHECK_ERROR(extra_stats_file, "failed to create extra stats file"); - } - - // Make sure that crash_handler gets called on any kind of fatal error. - int crash_signals[] = {SIGSEGV, SIGBUS, SIGABRT, SIGILL, SIGFPE, SIGINT, - SIGTERM}; - - const size_t num_signals = sizeof(crash_signals) / sizeof(crash_signals[0]); - - for (size_t idx = 0; idx < num_signals; idx++) - SetSigaction(crash_signals[idx], crash_handler); - - // Make sure it gets called on other kinds of exits. - atexit(write_extra_stats); -} - -// If the user asks us to duplicate stderr, then do it. -static void maybe_duplicate_stderr() { - char* stderr_duplicate_filename = - getenv("AFL_DRIVER_STDERR_DUPLICATE_FILENAME"); - - if (!stderr_duplicate_filename) - return; - - FILE* stderr_duplicate_stream = - freopen(stderr_duplicate_filename, "a+", stderr); - - if (!stderr_duplicate_stream) { - fprintf( - stderr, - "Failed to duplicate stderr to AFL_DRIVER_STDERR_DUPLICATE_FILENAME"); - abort(); - } -} - -// Define LLVMFuzzerMutate to avoid link failures for targets that use it -// with libFuzzer's LLVMFuzzerCustomMutator. -extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { - assert(false && "LLVMFuzzerMutate should not be called from afl_driver"); - return 0; -} - -// Execute any files provided as parameters. -int ExecuteFilesOnyByOne(int argc, char **argv) { - for (int i = 1; i < argc; i++) { - std::ifstream in(argv[i]); - in.seekg(0, in.end); - size_t length = in.tellg(); - in.seekg (0, in.beg); - std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl; - // Allocate exactly length bytes so that we reliably catch buffer overflows. - std::vector bytes(length); - in.read(bytes.data(), bytes.size()); - assert(in); - LLVMFuzzerTestOneInput(reinterpret_cast(bytes.data()), - bytes.size()); - std::cout << "Execution successfull" << std::endl; - } - return 0; -} - -int main(int argc, char **argv) { - fprintf(stderr, - "======================= INFO =========================\n" - "This binary is built for AFL-fuzz.\n" - "To run the target function on individual input(s) execute this:\n" - " %s < INPUT_FILE\n" - "or\n" - " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" - "To fuzz with afl-fuzz execute this:\n" - " afl-fuzz [afl-flags] %s [-N]\n" - "afl-fuzz will run N iterations before " - "re-spawning the process (default: 1000)\n" - "======================================================\n", - argv[0], argv[0], argv[0]); - if (LLVMFuzzerInitialize) - LLVMFuzzerInitialize(&argc, &argv); - // Do any other expensive one-time initialization here. - - maybe_duplicate_stderr(); - maybe_initialize_extra_stats(); - - __afl_manual_init(); - - int N = 1000; - if (argc == 2 && argv[1][0] == '-') - N = atoi(argv[1] + 1); - else if(argc == 2 && (N = atoi(argv[1])) > 0) - fprintf(stderr, "WARNING: using the deprecated call style `%s %d`\n", - argv[0], N); - else if (argc > 1) - return ExecuteFilesOnyByOne(argc, argv); - - assert(N > 0); - time_t unit_time_secs; - int num_runs = 0; - while (__afl_persistent_loop(N)) { - ssize_t n_read = read(0, AflInputBuf, kMaxAflInputSize); - if (n_read > 0) { - // Copy AflInputBuf into a separate buffer to let asan find buffer - // overflows. Don't use unique_ptr/etc to avoid extra dependencies. - uint8_t *copy = new uint8_t[n_read]; - memcpy(copy, AflInputBuf, n_read); - - struct timeval unit_start_time; - CHECK_ERROR(gettimeofday(&unit_start_time, NULL) == 0, - "Calling gettimeofday failed"); - - num_runs++; - LLVMFuzzerTestOneInput(copy, n_read); - - struct timeval unit_stop_time; - CHECK_ERROR(gettimeofday(&unit_stop_time, NULL) == 0, - "Calling gettimeofday failed"); - - // Update slowest_unit_time_secs if we see a new max. - unit_time_secs = unit_stop_time.tv_sec - unit_start_time.tv_sec; - if (slowest_unit_time_secs < unit_time_secs) - slowest_unit_time_secs = unit_time_secs; - - delete[] copy; - } - } - fprintf(stderr, "%s: successfully executed %d input(s)\n", argv[0], num_runs); -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/build.sh llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/build.sh --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/build.sh 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/build.sh 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -#!/bin/bash -LIBFUZZER_SRC_DIR=$(dirname $0) -CXX="${CXX:-clang}" -for f in $LIBFUZZER_SRC_DIR/*.cpp; do - $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c & -done -wait -rm -f libFuzzer.a -ar ru libFuzzer.a Fuzzer*.o -rm -f Fuzzer*.o - diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/cxx.dict llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/cxx.dict --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/cxx.dict 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/cxx.dict 1970-01-01 00:00:00.000000000 +0000 @@ -1,122 +0,0 @@ -"++" -"--" -"<<" -">>" -"+=" -"-=" -"*=" -"/=" -">>=" -"<<=" -"&=" -"|=" -"^=" -"%=" -"!=" -"&&" -"||" -"==" -">=" -"<=" -"->" -"alignas" -"alignof" -"and" -"and_eq" -"asm" -"auto" -"bitand" -"bitor" -"bool" -"break" -"case" -"catch" -"char" -"char16_t" -"char32_t" -"class" -"compl" -"concept" -"const" -"constexpr" -"const_cast" -"continue" -"decltype" -"default" -"delete" -"do" -"double" -"dynamic_cast" -"else" -"enum" -"explicit" -"export" -"extern" -"false" -"float" -"for" -"friend" -"goto" -"if" -"inline" -"int" -"long" -"mutable" -"namespace" -"new" -"noexcept" -"not" -"not_eq" -"nullptr" -"operator" -"or" -"or_eq" -"private" -"protected" -"public" -"register" -"reinterpret_cast" -"requires" -"return" -"short" -"signed" -"sizeof" -"static" -"static_assert" -"static_cast" -"struct" -"switch" -"template" -"this" -"thread_local" -"throw" -"true" -"try" -"typedef" -"typeid" -"typename" -"union" -"unsigned" -"using" -"virtual" -"void" -"volatile" -"wchar_t" -"while" -"xor" -"xor_eq" -"if" -"elif" -"else" -"endif" -"defined" -"ifdef" -"ifndef" -"define" -"undef" -"include" -"line" -"error" -"pragma" -"override" -"final" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerClangCounters.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerClangCounters.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerClangCounters.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerClangCounters.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Coverage counters from Clang's SourceBasedCodeCoverage. -//===----------------------------------------------------------------------===// - -// Support for SourceBasedCodeCoverage is experimental: -// * Works only for the main binary, not DSOs yet. -// * Works only on Linux. -// * Does not implement print_pcs/print_coverage yet. -// * Is not fully evaluated for performance and sensitivity. -// We expect large performance drop due to 64-bit counters, -// and *maybe* better sensitivity due to more fine-grained counters. -// Preliminary comparison on a single benchmark (RE2) shows -// a bit worse sensitivity though. - -#include "FuzzerDefs.h" - -#if LIBFUZZER_LINUX -__attribute__((weak)) extern uint64_t __start___llvm_prf_cnts; -__attribute__((weak)) extern uint64_t __stop___llvm_prf_cnts; -namespace fuzzer { -uint64_t *ClangCountersBegin() { return &__start___llvm_prf_cnts; } -uint64_t *ClangCountersEnd() { return &__stop___llvm_prf_cnts; } -} // namespace fuzzer -#else -// TODO: Implement on Mac (if the data shows it's worth it). -//__attribute__((visibility("hidden"))) -//extern uint64_t CountersStart __asm("section$start$__DATA$__llvm_prf_cnts"); -//__attribute__((visibility("hidden"))) -//extern uint64_t CountersEnd __asm("section$end$__DATA$__llvm_prf_cnts"); -namespace fuzzer { -uint64_t *ClangCountersBegin() { return nullptr; } -uint64_t *ClangCountersEnd() { return nullptr; } -} // namespace fuzzer -#endif - -namespace fuzzer { -ATTRIBUTE_NO_SANITIZE_ALL -void ClearClangCounters() { // hand-written memset, don't asan-ify. - for (auto P = ClangCountersBegin(); P < ClangCountersEnd(); P++) - *P = 0; -} -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerCorpus.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerCorpus.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerCorpus.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerCorpus.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,275 +0,0 @@ -//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::InputCorpus -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_CORPUS -#define LLVM_FUZZER_CORPUS - -#include "FuzzerDefs.h" -#include "FuzzerIO.h" -#include "FuzzerRandom.h" -#include "FuzzerSHA1.h" -#include "FuzzerTracePC.h" -#include -#include -#include -#include - -namespace fuzzer { - -struct InputInfo { - Unit U; // The actual input data. - uint8_t Sha1[kSHA1NumBytes]; // Checksum. - // Number of features that this input has and no smaller input has. - size_t NumFeatures = 0; - size_t Tmp = 0; // Used by ValidateFeatureSet. - // Stats. - size_t NumExecutedMutations = 0; - size_t NumSuccessfullMutations = 0; - bool MayDeleteFile = false; - bool Reduced = false; - std::vector UniqFeatureSet; -}; - -class InputCorpus { - static const size_t kFeatureSetSize = 1 << 21; - public: - InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { - memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); - memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); - } - ~InputCorpus() { - for (auto II : Inputs) - delete II; - } - size_t size() const { return Inputs.size(); } - size_t SizeInBytes() const { - size_t Res = 0; - for (auto II : Inputs) - Res += II->U.size(); - return Res; - } - size_t NumActiveUnits() const { - size_t Res = 0; - for (auto II : Inputs) - Res += !II->U.empty(); - return Res; - } - size_t MaxInputSize() const { - size_t Res = 0; - for (auto II : Inputs) - Res = std::max(Res, II->U.size()); - return Res; - } - bool empty() const { return Inputs.empty(); } - const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } - void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, - const std::vector &FeatureSet) { - assert(!U.empty()); - if (FeatureDebug) - Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); - Inputs.push_back(new InputInfo()); - InputInfo &II = *Inputs.back(); - II.U = U; - II.NumFeatures = NumFeatures; - II.MayDeleteFile = MayDeleteFile; - II.UniqFeatureSet = FeatureSet; - std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); - ComputeSHA1(U.data(), U.size(), II.Sha1); - Hashes.insert(Sha1ToString(II.Sha1)); - UpdateCorpusDistribution(); - PrintCorpus(); - // ValidateFeatureSet(); - } - - // Debug-only - void PrintUnit(const Unit &U) { - if (!FeatureDebug) return; - for (uint8_t C : U) { - if (C != 'F' && C != 'U' && C != 'Z') - C = '.'; - Printf("%c", C); - } - } - - // Debug-only - void PrintFeatureSet(const std::vector &FeatureSet) { - if (!FeatureDebug) return; - Printf("{"); - for (uint32_t Feature: FeatureSet) - Printf("%u,", Feature); - Printf("}"); - } - - // Debug-only - void PrintCorpus() { - if (!FeatureDebug) return; - Printf("======= CORPUS:\n"); - int i = 0; - for (auto II : Inputs) { - if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { - Printf("[%2d] ", i); - Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); - PrintUnit(II->U); - Printf(" "); - PrintFeatureSet(II->UniqFeatureSet); - Printf("\n"); - } - i++; - } - } - - void Replace(InputInfo *II, const Unit &U) { - assert(II->U.size() > U.size()); - Hashes.erase(Sha1ToString(II->Sha1)); - DeleteFile(*II); - ComputeSHA1(U.data(), U.size(), II->Sha1); - Hashes.insert(Sha1ToString(II->Sha1)); - II->U = U; - II->Reduced = true; - } - - bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } - bool HasUnit(const std::string &H) { return Hashes.count(H); } - InputInfo &ChooseUnitToMutate(Random &Rand) { - InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; - assert(!II.U.empty()); - return II; - }; - - // Returns an index of random unit from the corpus to mutate. - // Hypothesis: units added to the corpus last are more likely to be - // interesting. This function gives more weight to the more recent units. - size_t ChooseUnitIdxToMutate(Random &Rand) { - size_t Idx = static_cast(CorpusDistribution(Rand)); - assert(Idx < Inputs.size()); - return Idx; - } - - void PrintStats() { - for (size_t i = 0; i < Inputs.size(); i++) { - const auto &II = *Inputs[i]; - Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i, - Sha1ToString(II.Sha1).c_str(), II.U.size(), - II.NumExecutedMutations, II.NumSuccessfullMutations); - } - } - - void PrintFeatureSet() { - for (size_t i = 0; i < kFeatureSetSize; i++) { - if(size_t Sz = GetFeature(i)) - Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); - } - Printf("\n\t"); - for (size_t i = 0; i < Inputs.size(); i++) - if (size_t N = Inputs[i]->NumFeatures) - Printf(" %zd=>%zd ", i, N); - Printf("\n"); - } - - void DeleteFile(const InputInfo &II) { - if (!OutputCorpus.empty() && II.MayDeleteFile) - RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); - } - - void DeleteInput(size_t Idx) { - InputInfo &II = *Inputs[Idx]; - DeleteFile(II); - Unit().swap(II.U); - if (FeatureDebug) - Printf("EVICTED %zd\n", Idx); - } - - bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { - assert(NewSize); - Idx = Idx % kFeatureSetSize; - uint32_t OldSize = GetFeature(Idx); - if (OldSize == 0 || (Shrink && OldSize > NewSize)) { - if (OldSize > 0) { - size_t OldIdx = SmallestElementPerFeature[Idx]; - InputInfo &II = *Inputs[OldIdx]; - assert(II.NumFeatures > 0); - II.NumFeatures--; - if (II.NumFeatures == 0) - DeleteInput(OldIdx); - } else { - NumAddedFeatures++; - } - NumUpdatedFeatures++; - if (FeatureDebug) - Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); - SmallestElementPerFeature[Idx] = Inputs.size(); - InputSizesPerFeature[Idx] = NewSize; - return true; - } - return false; - } - - size_t NumFeatures() const { return NumAddedFeatures; } - size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } - - void ResetFeatureSet() { - assert(Inputs.empty()); - memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); - memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); - } - -private: - - static const bool FeatureDebug = false; - - size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } - - void ValidateFeatureSet() { - if (FeatureDebug) - PrintFeatureSet(); - for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) - if (GetFeature(Idx)) - Inputs[SmallestElementPerFeature[Idx]]->Tmp++; - for (auto II: Inputs) { - if (II->Tmp != II->NumFeatures) - Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures); - assert(II->Tmp == II->NumFeatures); - II->Tmp = 0; - } - } - - // Updates the probability distribution for the units in the corpus. - // Must be called whenever the corpus or unit weights are changed. - void UpdateCorpusDistribution() { - size_t N = Inputs.size(); - assert(N); - Intervals.resize(N + 1); - Weights.resize(N); - std::iota(Intervals.begin(), Intervals.end(), 0); - for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures * (i + 1); - CorpusDistribution = std::piecewise_constant_distribution( - Intervals.begin(), Intervals.end(), Weights.begin()); - } - std::piecewise_constant_distribution CorpusDistribution; - - std::vector Intervals; - std::vector Weights; - - std::unordered_set Hashes; - std::vector Inputs; - - size_t NumAddedFeatures = 0; - size_t NumUpdatedFeatures = 0; - uint32_t InputSizesPerFeature[kFeatureSetSize]; - uint32_t SmallestElementPerFeature[kFeatureSetSize]; - - std::string OutputCorpus; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_CORPUS diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerCrossOver.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerCrossOver.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerCrossOver.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerCrossOver.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -//===- FuzzerCrossOver.cpp - Cross over two test inputs -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cross over test inputs. -//===----------------------------------------------------------------------===// - -#include "FuzzerDefs.h" -#include "FuzzerMutate.h" -#include "FuzzerRandom.h" -#include - -namespace fuzzer { - -// Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out. -size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize) { - assert(Size1 || Size2); - MaxOutSize = Rand(MaxOutSize) + 1; - size_t OutPos = 0; - size_t Pos1 = 0; - size_t Pos2 = 0; - size_t *InPos = &Pos1; - size_t InSize = Size1; - const uint8_t *Data = Data1; - bool CurrentlyUsingFirstData = true; - while (OutPos < MaxOutSize && (Pos1 < Size1 || Pos2 < Size2)) { - // Merge a part of Data into Out. - size_t OutSizeLeft = MaxOutSize - OutPos; - if (*InPos < InSize) { - size_t InSizeLeft = InSize - *InPos; - size_t MaxExtraSize = std::min(OutSizeLeft, InSizeLeft); - size_t ExtraSize = Rand(MaxExtraSize) + 1; - memcpy(Out + OutPos, Data + *InPos, ExtraSize); - OutPos += ExtraSize; - (*InPos) += ExtraSize; - } - // Use the other input data on the next iteration. - InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; - InSize = CurrentlyUsingFirstData ? Size2 : Size1; - Data = CurrentlyUsingFirstData ? Data2 : Data1; - CurrentlyUsingFirstData = !CurrentlyUsingFirstData; - } - return OutPos; -} - -} // namespace fuzzer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerDefs.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerDefs.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerDefs.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerDefs.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -//===- FuzzerDefs.h - Internal header for the Fuzzer ------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Basic definitions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_DEFS_H -#define LLVM_FUZZER_DEFS_H - -#include -#include -#include -#include -#include -#include - -// Platform detection. -#ifdef __linux__ -#define LIBFUZZER_APPLE 0 -#define LIBFUZZER_LINUX 1 -#define LIBFUZZER_WINDOWS 0 -#elif __APPLE__ -#define LIBFUZZER_APPLE 1 -#define LIBFUZZER_LINUX 0 -#define LIBFUZZER_WINDOWS 0 -#elif _WIN32 -#define LIBFUZZER_APPLE 0 -#define LIBFUZZER_LINUX 0 -#define LIBFUZZER_WINDOWS 1 -#else -#error "Support for your platform has not been implemented" -#endif - -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif - -#define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX - -#ifdef __x86_64 -# if __has_attribute(target) -# define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) -# else -# define ATTRIBUTE_TARGET_POPCNT -# endif -#else -# define ATTRIBUTE_TARGET_POPCNT -#endif - - -#ifdef __clang__ // avoid gcc warning. -# if __has_attribute(no_sanitize) -# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) -# else -# define ATTRIBUTE_NO_SANITIZE_MEMORY -# endif -# define ALWAYS_INLINE __attribute__((always_inline)) -#else -# define ATTRIBUTE_NO_SANITIZE_MEMORY -# define ALWAYS_INLINE -#endif // __clang__ - -#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address)) - -#if defined(__has_feature) -# if __has_feature(address_sanitizer) -# define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS -# elif __has_feature(memory_sanitizer) -# define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY -# else -# define ATTRIBUTE_NO_SANITIZE_ALL -# endif -#else -# define ATTRIBUTE_NO_SANITIZE_ALL -#endif - -#if LIBFUZZER_WINDOWS -#define ATTRIBUTE_INTERFACE __declspec(dllexport) -#else -#define ATTRIBUTE_INTERFACE __attribute__((visibility("default"))) -#endif - -namespace fuzzer { - -template T Min(T a, T b) { return a < b ? a : b; } -template T Max(T a, T b) { return a > b ? a : b; } - -class Random; -class Dictionary; -class DictionaryEntry; -class MutationDispatcher; -struct FuzzingOptions; -class InputCorpus; -struct InputInfo; -struct ExternalFunctions; - -// Global interface to functions that may or may not be available. -extern ExternalFunctions *EF; - -typedef std::vector Unit; -typedef std::vector UnitVector; -typedef int (*UserCallback)(const uint8_t *Data, size_t Size); - -int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); - -struct ScopedDoingMyOwnMemOrStr { - ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr++; } - ~ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr--; } - static int DoingMyOwnMemOrStr; -}; - -inline uint8_t Bswap(uint8_t x) { return x; } -inline uint16_t Bswap(uint16_t x) { return __builtin_bswap16(x); } -inline uint32_t Bswap(uint32_t x) { return __builtin_bswap32(x); } -inline uint64_t Bswap(uint64_t x) { return __builtin_bswap64(x); } - -uint8_t *ExtraCountersBegin(); -uint8_t *ExtraCountersEnd(); -void ClearExtraCounters(); - -uint64_t *ClangCountersBegin(); -uint64_t *ClangCountersEnd(); -void ClearClangCounters(); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_DEFS_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerDictionary.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerDictionary.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerDictionary.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerDictionary.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,127 +0,0 @@ -//===- FuzzerDictionary.h - Internal header for the Fuzzer ------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::Dictionary -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_DICTIONARY_H -#define LLVM_FUZZER_DICTIONARY_H - -#include "FuzzerDefs.h" -#include "FuzzerIO.h" -#include "FuzzerUtil.h" -#include -#include - -namespace fuzzer { -// A simple POD sized array of bytes. -template class FixedWord { -public: - static const size_t kMaxSize = kMaxSizeT; - FixedWord() {} - FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } - - void Set(const uint8_t *B, uint8_t S) { - assert(S <= kMaxSize); - memcpy(Data, B, S); - Size = S; - } - - bool operator==(const FixedWord &w) const { - ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str; - return Size == w.Size && 0 == memcmp(Data, w.Data, Size); - } - - bool operator<(const FixedWord &w) const { - ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str; - if (Size != w.Size) - return Size < w.Size; - return memcmp(Data, w.Data, Size) < 0; - } - - static size_t GetMaxSize() { return kMaxSize; } - const uint8_t *data() const { return Data; } - uint8_t size() const { return Size; } - -private: - uint8_t Size = 0; - uint8_t Data[kMaxSize]; -}; - -typedef FixedWord<64> Word; - -class DictionaryEntry { - public: - DictionaryEntry() {} - DictionaryEntry(Word W) : W(W) {} - DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} - const Word &GetW() const { return W; } - - bool HasPositionHint() const { return PositionHint != std::numeric_limits::max(); } - size_t GetPositionHint() const { - assert(HasPositionHint()); - return PositionHint; - } - void IncUseCount() { UseCount++; } - void IncSuccessCount() { SuccessCount++; } - size_t GetUseCount() const { return UseCount; } - size_t GetSuccessCount() const {return SuccessCount; } - - void Print(const char *PrintAfter = "\n") { - PrintASCII(W.data(), W.size()); - if (HasPositionHint()) - Printf("@%zd", GetPositionHint()); - Printf("%s", PrintAfter); - } - -private: - Word W; - size_t PositionHint = std::numeric_limits::max(); - size_t UseCount = 0; - size_t SuccessCount = 0; -}; - -class Dictionary { - public: - static const size_t kMaxDictSize = 1 << 14; - - bool ContainsWord(const Word &W) const { - return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { - return DE.GetW() == W; - }); - } - const DictionaryEntry *begin() const { return &DE[0]; } - const DictionaryEntry *end() const { return begin() + Size; } - DictionaryEntry & operator[] (size_t Idx) { - assert(Idx < Size); - return DE[Idx]; - } - void push_back(DictionaryEntry DE) { - if (Size < kMaxDictSize) - this->DE[Size++] = DE; - } - void clear() { Size = 0; } - bool empty() const { return Size == 0; } - size_t size() const { return Size; } - -private: - DictionaryEntry DE[kMaxDictSize]; - size_t Size = 0; -}; - -// Parses one dictionary entry. -// If successfull, write the enty to Unit and returns true, -// otherwise returns false. -bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); -// Parses the dictionary file, fills Units, returns true iff all lines -// were parsed succesfully. -bool ParseDictionaryFile(const std::string &Text, std::vector *Units); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_DICTIONARY_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerDriver.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerDriver.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerDriver.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerDriver.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,764 +0,0 @@ -//===- FuzzerDriver.cpp - FuzzerDriver function and flags -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// FuzzerDriver and flag parsing. -//===----------------------------------------------------------------------===// - -#include "FuzzerCorpus.h" -#include "FuzzerIO.h" -#include "FuzzerInterface.h" -#include "FuzzerInternal.h" -#include "FuzzerMutate.h" -#include "FuzzerRandom.h" -#include "FuzzerShmem.h" -#include "FuzzerTracePC.h" -#include -#include -#include -#include -#include -#include -#include -#include - -// This function should be present in the libFuzzer so that the client -// binary can test for its existence. -extern "C" __attribute__((used)) void __libfuzzer_is_present() {} - -namespace fuzzer { - -// Program arguments. -struct FlagDescription { - const char *Name; - const char *Description; - int Default; - int *IntFlag; - const char **StrFlag; - unsigned int *UIntFlag; -}; - -struct { -#define FUZZER_DEPRECATED_FLAG(Name) -#define FUZZER_FLAG_INT(Name, Default, Description) int Name; -#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name; -#define FUZZER_FLAG_STRING(Name, Description) const char *Name; -#include "FuzzerFlags.def" -#undef FUZZER_DEPRECATED_FLAG -#undef FUZZER_FLAG_INT -#undef FUZZER_FLAG_UNSIGNED -#undef FUZZER_FLAG_STRING -} Flags; - -static const FlagDescription FlagDescriptions [] { -#define FUZZER_DEPRECATED_FLAG(Name) \ - {#Name, "Deprecated; don't use", 0, nullptr, nullptr, nullptr}, -#define FUZZER_FLAG_INT(Name, Default, Description) \ - {#Name, Description, Default, &Flags.Name, nullptr, nullptr}, -#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) \ - {#Name, Description, static_cast(Default), \ - nullptr, nullptr, &Flags.Name}, -#define FUZZER_FLAG_STRING(Name, Description) \ - {#Name, Description, 0, nullptr, &Flags.Name, nullptr}, -#include "FuzzerFlags.def" -#undef FUZZER_DEPRECATED_FLAG -#undef FUZZER_FLAG_INT -#undef FUZZER_FLAG_UNSIGNED -#undef FUZZER_FLAG_STRING -}; - -static const size_t kNumFlags = - sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]); - -static std::vector *Inputs; -static std::string *ProgName; - -static void PrintHelp() { - Printf("Usage:\n"); - auto Prog = ProgName->c_str(); - Printf("\nTo run fuzzing pass 0 or more directories.\n"); - Printf("%s [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]\n", Prog); - - Printf("\nTo run individual tests without fuzzing pass 1 or more files:\n"); - Printf("%s [-flag1=val1 [-flag2=val2 ...] ] file1 [file2 ...]\n", Prog); - - Printf("\nFlags: (strictly in form -flag=value)\n"); - size_t MaxFlagLen = 0; - for (size_t F = 0; F < kNumFlags; F++) - MaxFlagLen = std::max(strlen(FlagDescriptions[F].Name), MaxFlagLen); - - for (size_t F = 0; F < kNumFlags; F++) { - const auto &D = FlagDescriptions[F]; - if (strstr(D.Description, "internal flag") == D.Description) continue; - Printf(" %s", D.Name); - for (size_t i = 0, n = MaxFlagLen - strlen(D.Name); i < n; i++) - Printf(" "); - Printf("\t"); - Printf("%d\t%s\n", D.Default, D.Description); - } - Printf("\nFlags starting with '--' will be ignored and " - "will be passed verbatim to subprocesses.\n"); -} - -static const char *FlagValue(const char *Param, const char *Name) { - size_t Len = strlen(Name); - if (Param[0] == '-' && strstr(Param + 1, Name) == Param + 1 && - Param[Len + 1] == '=') - return &Param[Len + 2]; - return nullptr; -} - -// Avoid calling stol as it triggers a bug in clang/glibc build. -static long MyStol(const char *Str) { - long Res = 0; - long Sign = 1; - if (*Str == '-') { - Str++; - Sign = -1; - } - for (size_t i = 0; Str[i]; i++) { - char Ch = Str[i]; - if (Ch < '0' || Ch > '9') - return Res; - Res = Res * 10 + (Ch - '0'); - } - return Res * Sign; -} - -static bool ParseOneFlag(const char *Param) { - if (Param[0] != '-') return false; - if (Param[1] == '-') { - static bool PrintedWarning = false; - if (!PrintedWarning) { - PrintedWarning = true; - Printf("INFO: libFuzzer ignores flags that start with '--'\n"); - } - for (size_t F = 0; F < kNumFlags; F++) - if (FlagValue(Param + 1, FlagDescriptions[F].Name)) - Printf("WARNING: did you mean '%s' (single dash)?\n", Param + 1); - return true; - } - for (size_t F = 0; F < kNumFlags; F++) { - const char *Name = FlagDescriptions[F].Name; - const char *Str = FlagValue(Param, Name); - if (Str) { - if (FlagDescriptions[F].IntFlag) { - int Val = MyStol(Str); - *FlagDescriptions[F].IntFlag = Val; - if (Flags.verbosity >= 2) - Printf("Flag: %s %d\n", Name, Val); - return true; - } else if (FlagDescriptions[F].UIntFlag) { - unsigned int Val = std::stoul(Str); - *FlagDescriptions[F].UIntFlag = Val; - if (Flags.verbosity >= 2) - Printf("Flag: %s %u\n", Name, Val); - return true; - } else if (FlagDescriptions[F].StrFlag) { - *FlagDescriptions[F].StrFlag = Str; - if (Flags.verbosity >= 2) - Printf("Flag: %s %s\n", Name, Str); - return true; - } else { // Deprecated flag. - Printf("Flag: %s: deprecated, don't use\n", Name); - return true; - } - } - } - Printf("\n\nWARNING: unrecognized flag '%s'; " - "use -help=1 to list all flags\n\n", Param); - return true; -} - -// We don't use any library to minimize dependencies. -static void ParseFlags(const std::vector &Args) { - for (size_t F = 0; F < kNumFlags; F++) { - if (FlagDescriptions[F].IntFlag) - *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; - if (FlagDescriptions[F].UIntFlag) - *FlagDescriptions[F].UIntFlag = - static_cast(FlagDescriptions[F].Default); - if (FlagDescriptions[F].StrFlag) - *FlagDescriptions[F].StrFlag = nullptr; - } - Inputs = new std::vector; - for (size_t A = 1; A < Args.size(); A++) { - if (ParseOneFlag(Args[A].c_str())) { - if (Flags.ignore_remaining_args) - break; - continue; - } - Inputs->push_back(Args[A]); - } -} - -static std::mutex Mu; - -static void PulseThread() { - while (true) { - SleepSeconds(600); - std::lock_guard Lock(Mu); - Printf("pulse...\n"); - } -} - -static void WorkerThread(const std::string &Cmd, std::atomic *Counter, - unsigned NumJobs, std::atomic *HasErrors) { - while (true) { - unsigned C = (*Counter)++; - if (C >= NumJobs) break; - std::string Log = "fuzz-" + std::to_string(C) + ".log"; - std::string ToRun = Cmd + " > " + Log + " 2>&1\n"; - if (Flags.verbosity) - Printf("%s", ToRun.c_str()); - int ExitCode = ExecuteCommand(ToRun); - if (ExitCode != 0) - *HasErrors = true; - std::lock_guard Lock(Mu); - Printf("================== Job %u exited with exit code %d ============\n", - C, ExitCode); - fuzzer::CopyFileToErr(Log); - } -} - -std::string CloneArgsWithoutX(const std::vector &Args, - const char *X1, const char *X2) { - std::string Cmd; - for (auto &S : Args) { - if (FlagValue(S.c_str(), X1) || FlagValue(S.c_str(), X2)) - continue; - Cmd += S + " "; - } - return Cmd; -} - -static int RunInMultipleProcesses(const std::vector &Args, - unsigned NumWorkers, unsigned NumJobs) { - std::atomic Counter(0); - std::atomic HasErrors(false); - std::string Cmd = CloneArgsWithoutX(Args, "jobs", "workers"); - std::vector V; - std::thread Pulse(PulseThread); - Pulse.detach(); - for (unsigned i = 0; i < NumWorkers; i++) - V.push_back(std::thread(WorkerThread, Cmd, &Counter, NumJobs, &HasErrors)); - for (auto &T : V) - T.join(); - return HasErrors ? 1 : 0; -} - -static void RssThread(Fuzzer *F, size_t RssLimitMb) { - while (true) { - SleepSeconds(1); - size_t Peak = GetPeakRSSMb(); - if (Peak > RssLimitMb) - F->RssLimitCallback(); - } -} - -static void StartRssThread(Fuzzer *F, size_t RssLimitMb) { - if (!RssLimitMb) return; - std::thread T(RssThread, F, RssLimitMb); - T.detach(); -} - -int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) { - Unit U = FileToVector(InputFilePath); - if (MaxLen && MaxLen < U.size()) - U.resize(MaxLen); - F->ExecuteCallback(U.data(), U.size()); - F->TryDetectingAMemoryLeak(U.data(), U.size(), true); - return 0; -} - -static bool AllInputsAreFiles() { - if (Inputs->empty()) return false; - for (auto &Path : *Inputs) - if (!IsFile(Path)) - return false; - return true; -} - -static std::string GetDedupTokenFromFile(const std::string &Path) { - auto S = FileToString(Path); - auto Beg = S.find("DEDUP_TOKEN:"); - if (Beg == std::string::npos) - return ""; - auto End = S.find('\n', Beg); - if (End == std::string::npos) - return ""; - return S.substr(Beg, End - Beg); -} - -int CleanseCrashInput(const std::vector &Args, - const FuzzingOptions &Options) { - if (Inputs->size() != 1 || !Flags.exact_artifact_path) { - Printf("ERROR: -cleanse_crash should be given one input file and" - " -exact_artifact_path\n"); - exit(1); - } - std::string InputFilePath = Inputs->at(0); - std::string OutputFilePath = Flags.exact_artifact_path; - std::string BaseCmd = - CloneArgsWithoutX(Args, "cleanse_crash", "cleanse_crash"); - - auto InputPos = BaseCmd.find(" " + InputFilePath + " "); - assert(InputPos != std::string::npos); - BaseCmd.erase(InputPos, InputFilePath.size() + 1); - - auto LogFilePath = DirPlusFile( - TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); - auto TmpFilePath = DirPlusFile( - TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".repro"); - auto LogFileRedirect = " > " + LogFilePath + " 2>&1 "; - - auto Cmd = BaseCmd + " " + TmpFilePath + LogFileRedirect; - - std::string CurrentFilePath = InputFilePath; - auto U = FileToVector(CurrentFilePath); - size_t Size = U.size(); - - const std::vector ReplacementBytes = {' ', 0xff}; - for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) { - bool Changed = false; - for (size_t Idx = 0; Idx < Size; Idx++) { - Printf("CLEANSE[%d]: Trying to replace byte %zd of %zd\n", NumAttempts, - Idx, Size); - uint8_t OriginalByte = U[Idx]; - if (ReplacementBytes.end() != std::find(ReplacementBytes.begin(), - ReplacementBytes.end(), - OriginalByte)) - continue; - for (auto NewByte : ReplacementBytes) { - U[Idx] = NewByte; - WriteToFile(U, TmpFilePath); - auto ExitCode = ExecuteCommand(Cmd); - RemoveFile(TmpFilePath); - if (!ExitCode) { - U[Idx] = OriginalByte; - } else { - Changed = true; - Printf("CLEANSE: Replaced byte %zd with 0x%x\n", Idx, NewByte); - WriteToFile(U, OutputFilePath); - break; - } - } - } - if (!Changed) break; - } - RemoveFile(LogFilePath); - return 0; -} - -int MinimizeCrashInput(const std::vector &Args, - const FuzzingOptions &Options) { - if (Inputs->size() != 1) { - Printf("ERROR: -minimize_crash should be given one input file\n"); - exit(1); - } - std::string InputFilePath = Inputs->at(0); - auto BaseCmd = SplitBefore( - "-ignore_remaining_args=1", - CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path")); - auto InputPos = BaseCmd.first.find(" " + InputFilePath + " "); - assert(InputPos != std::string::npos); - BaseCmd.first.erase(InputPos, InputFilePath.size() + 1); - if (Flags.runs <= 0 && Flags.max_total_time == 0) { - Printf("INFO: you need to specify -runs=N or " - "-max_total_time=N with -minimize_crash=1\n" - "INFO: defaulting to -max_total_time=600\n"); - BaseCmd.first += " -max_total_time=600"; - } - - auto LogFilePath = DirPlusFile( - TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); - auto LogFileRedirect = " > " + LogFilePath + " 2>&1 "; - - std::string CurrentFilePath = InputFilePath; - while (true) { - Unit U = FileToVector(CurrentFilePath); - Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n", - CurrentFilePath.c_str(), U.size()); - - auto Cmd = BaseCmd.first + " " + CurrentFilePath + LogFileRedirect + " " + - BaseCmd.second; - - Printf("CRASH_MIN: executing: %s\n", Cmd.c_str()); - int ExitCode = ExecuteCommand(Cmd); - if (ExitCode == 0) { - Printf("ERROR: the input %s did not crash\n", CurrentFilePath.c_str()); - exit(1); - } - Printf("CRASH_MIN: '%s' (%zd bytes) caused a crash. Will try to minimize " - "it further\n", - CurrentFilePath.c_str(), U.size()); - auto DedupToken1 = GetDedupTokenFromFile(LogFilePath); - if (!DedupToken1.empty()) - Printf("CRASH_MIN: DedupToken1: %s\n", DedupToken1.c_str()); - - std::string ArtifactPath = - Flags.exact_artifact_path - ? Flags.exact_artifact_path - : Options.ArtifactPrefix + "minimized-from-" + Hash(U); - Cmd += " -minimize_crash_internal_step=1 -exact_artifact_path=" + - ArtifactPath; - Printf("CRASH_MIN: executing: %s\n", Cmd.c_str()); - ExitCode = ExecuteCommand(Cmd); - CopyFileToErr(LogFilePath); - if (ExitCode == 0) { - if (Flags.exact_artifact_path) { - CurrentFilePath = Flags.exact_artifact_path; - WriteToFile(U, CurrentFilePath); - } - Printf("CRASH_MIN: failed to minimize beyond %s (%d bytes), exiting\n", - CurrentFilePath.c_str(), U.size()); - break; - } - auto DedupToken2 = GetDedupTokenFromFile(LogFilePath); - if (!DedupToken2.empty()) - Printf("CRASH_MIN: DedupToken2: %s\n", DedupToken2.c_str()); - - if (DedupToken1 != DedupToken2) { - if (Flags.exact_artifact_path) { - CurrentFilePath = Flags.exact_artifact_path; - WriteToFile(U, CurrentFilePath); - } - Printf("CRASH_MIN: mismatch in dedup tokens" - " (looks like a different bug). Won't minimize further\n"); - break; - } - - CurrentFilePath = ArtifactPath; - Printf("*********************************\n"); - } - RemoveFile(LogFilePath); - return 0; -} - -int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { - assert(Inputs->size() == 1); - std::string InputFilePath = Inputs->at(0); - Unit U = FileToVector(InputFilePath); - Printf("INFO: Starting MinimizeCrashInputInternalStep: %zd\n", U.size()); - if (U.size() < 2) { - Printf("INFO: The input is small enough, exiting\n"); - exit(0); - } - F->SetMaxInputLen(U.size()); - F->SetMaxMutationLen(U.size() - 1); - F->MinimizeCrashLoop(U); - Printf("INFO: Done MinimizeCrashInputInternalStep, no crashes found\n"); - exit(0); - return 0; -} - -int AnalyzeDictionary(Fuzzer *F, const std::vector& Dict, - UnitVector& Corpus) { - Printf("Started dictionary minimization (up to %d tests)\n", - Dict.size() * Corpus.size() * 2); - - // Scores and usage count for each dictionary unit. - std::vector Scores(Dict.size()); - std::vector Usages(Dict.size()); - - std::vector InitialFeatures; - std::vector ModifiedFeatures; - for (auto &C : Corpus) { - // Get coverage for the testcase without modifications. - F->ExecuteCallback(C.data(), C.size()); - InitialFeatures.clear(); - TPC.CollectFeatures([&](size_t Feature) -> bool { - InitialFeatures.push_back(Feature); - return true; - }); - - for (size_t i = 0; i < Dict.size(); ++i) { - auto Data = C; - auto StartPos = std::search(Data.begin(), Data.end(), - Dict[i].begin(), Dict[i].end()); - // Skip dictionary unit, if the testcase does not contain it. - if (StartPos == Data.end()) - continue; - - ++Usages[i]; - while (StartPos != Data.end()) { - // Replace all occurrences of dictionary unit in the testcase. - auto EndPos = StartPos + Dict[i].size(); - for (auto It = StartPos; It != EndPos; ++It) - *It ^= 0xFF; - - StartPos = std::search(EndPos, Data.end(), - Dict[i].begin(), Dict[i].end()); - } - - // Get coverage for testcase with masked occurrences of dictionary unit. - F->ExecuteCallback(Data.data(), Data.size()); - ModifiedFeatures.clear(); - TPC.CollectFeatures([&](size_t Feature) -> bool { - ModifiedFeatures.push_back(Feature); - return true; - }); - - if (InitialFeatures == ModifiedFeatures) - --Scores[i]; - else - Scores[i] += 2; - } - } - - Printf("###### Useless dictionary elements. ######\n"); - for (size_t i = 0; i < Dict.size(); ++i) { - // Dictionary units with positive score are treated as useful ones. - if (Scores[i] > 0) - continue; - - Printf("\""); - PrintASCII(Dict[i].data(), Dict[i].size(), "\""); - Printf(" # Score: %d, Used: %d\n", Scores[i], Usages[i]); - } - Printf("###### End of useless dictionary elements. ######\n"); - return 0; -} - -int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { - using namespace fuzzer; - assert(argc && argv && "Argument pointers cannot be nullptr"); - std::string Argv0((*argv)[0]); - EF = new ExternalFunctions(); - if (EF->LLVMFuzzerInitialize) - EF->LLVMFuzzerInitialize(argc, argv); - const std::vector Args(*argv, *argv + *argc); - assert(!Args.empty()); - ProgName = new std::string(Args[0]); - if (Argv0 != *ProgName) { - Printf("ERROR: argv[0] has been modified in LLVMFuzzerInitialize\n"); - exit(1); - } - ParseFlags(Args); - if (Flags.help) { - PrintHelp(); - return 0; - } - - if (Flags.close_fd_mask & 2) - DupAndCloseStderr(); - if (Flags.close_fd_mask & 1) - CloseStdout(); - - if (Flags.jobs > 0 && Flags.workers == 0) { - Flags.workers = std::min(NumberOfCpuCores() / 2, Flags.jobs); - if (Flags.workers > 1) - Printf("Running %u workers\n", Flags.workers); - } - - if (Flags.workers > 0 && Flags.jobs > 0) - return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); - - const size_t kMaxSaneLen = 1 << 20; - const size_t kMinDefaultLen = 4096; - FuzzingOptions Options; - Options.Verbosity = Flags.verbosity; - Options.MaxLen = Flags.max_len; - Options.ExperimentalLenControl = Flags.experimental_len_control; - Options.UnitTimeoutSec = Flags.timeout; - Options.ErrorExitCode = Flags.error_exitcode; - Options.TimeoutExitCode = Flags.timeout_exitcode; - Options.MaxTotalTimeSec = Flags.max_total_time; - Options.DoCrossOver = Flags.cross_over; - Options.MutateDepth = Flags.mutate_depth; - Options.UseCounters = Flags.use_counters; - Options.UseIndirCalls = Flags.use_indir_calls; - Options.UseMemmem = Flags.use_memmem; - Options.UseCmp = Flags.use_cmp; - Options.UseValueProfile = Flags.use_value_profile; - Options.Shrink = Flags.shrink; - Options.ReduceInputs = Flags.reduce_inputs; - Options.ShuffleAtStartUp = Flags.shuffle; - Options.PreferSmall = Flags.prefer_small; - Options.ReloadIntervalSec = Flags.reload; - Options.OnlyASCII = Flags.only_ascii; - Options.DetectLeaks = Flags.detect_leaks; - Options.TraceMalloc = Flags.trace_malloc; - Options.RssLimitMb = Flags.rss_limit_mb; - if (Flags.runs >= 0) - Options.MaxNumberOfRuns = Flags.runs; - if (!Inputs->empty() && !Flags.minimize_crash_internal_step) - Options.OutputCorpus = (*Inputs)[0]; - Options.ReportSlowUnits = Flags.report_slow_units; - if (Flags.artifact_prefix) - Options.ArtifactPrefix = Flags.artifact_prefix; - if (Flags.exact_artifact_path) - Options.ExactArtifactPath = Flags.exact_artifact_path; - std::vector Dictionary; - if (Flags.dict) - if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary)) - return 1; - if (Flags.verbosity > 0 && !Dictionary.empty()) - Printf("Dictionary: %zd entries\n", Dictionary.size()); - bool DoPlainRun = AllInputsAreFiles(); - Options.SaveArtifacts = - !DoPlainRun || Flags.minimize_crash_internal_step; - Options.PrintNewCovPcs = Flags.print_pcs; - Options.PrintFinalStats = Flags.print_final_stats; - Options.PrintCorpusStats = Flags.print_corpus_stats; - Options.PrintCoverage = Flags.print_coverage; - Options.DumpCoverage = Flags.dump_coverage; - if (Flags.exit_on_src_pos) - Options.ExitOnSrcPos = Flags.exit_on_src_pos; - if (Flags.exit_on_item) - Options.ExitOnItem = Flags.exit_on_item; - - unsigned Seed = Flags.seed; - // Initialize Seed. - if (Seed == 0) - Seed = - std::chrono::system_clock::now().time_since_epoch().count() + GetPid(); - if (Flags.verbosity) - Printf("INFO: Seed: %u\n", Seed); - - Random Rand(Seed); - auto *MD = new MutationDispatcher(Rand, Options); - auto *Corpus = new InputCorpus(Options.OutputCorpus); - auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); - - for (auto &U: Dictionary) - if (U.size() <= Word::GetMaxSize()) - MD->AddWordToManualDictionary(Word(U.data(), U.size())); - - StartRssThread(F, Flags.rss_limit_mb); - - Options.HandleAbrt = Flags.handle_abrt; - Options.HandleBus = Flags.handle_bus; - Options.HandleFpe = Flags.handle_fpe; - Options.HandleIll = Flags.handle_ill; - Options.HandleInt = Flags.handle_int; - Options.HandleSegv = Flags.handle_segv; - Options.HandleTerm = Flags.handle_term; - Options.HandleXfsz = Flags.handle_xfsz; - SetSignalHandler(Options); - - std::atexit(Fuzzer::StaticExitCallback); - - if (Flags.minimize_crash) - return MinimizeCrashInput(Args, Options); - - if (Flags.minimize_crash_internal_step) - return MinimizeCrashInputInternalStep(F, Corpus); - - if (Flags.cleanse_crash) - return CleanseCrashInput(Args, Options); - - if (auto Name = Flags.run_equivalence_server) { - SMR.Destroy(Name); - if (!SMR.Create(Name)) { - Printf("ERROR: can't create shared memory region\n"); - return 1; - } - Printf("INFO: EQUIVALENCE SERVER UP\n"); - while (true) { - SMR.WaitClient(); - size_t Size = SMR.ReadByteArraySize(); - SMR.WriteByteArray(nullptr, 0); - const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size); - F->ExecuteCallback(tmp.data(), tmp.size()); - SMR.PostServer(); - } - return 0; - } - - if (auto Name = Flags.use_equivalence_server) { - if (!SMR.Open(Name)) { - Printf("ERROR: can't open shared memory region\n"); - return 1; - } - Printf("INFO: EQUIVALENCE CLIENT UP\n"); - } - - if (DoPlainRun) { - Options.SaveArtifacts = false; - int Runs = std::max(1, Flags.runs); - Printf("%s: Running %zd inputs %d time(s) each.\n", ProgName->c_str(), - Inputs->size(), Runs); - for (auto &Path : *Inputs) { - auto StartTime = system_clock::now(); - Printf("Running: %s\n", Path.c_str()); - for (int Iter = 0; Iter < Runs; Iter++) - RunOneTest(F, Path.c_str(), Options.MaxLen); - auto StopTime = system_clock::now(); - auto MS = duration_cast(StopTime - StartTime).count(); - Printf("Executed %s in %zd ms\n", Path.c_str(), (long)MS); - } - Printf("***\n" - "*** NOTE: fuzzing was not performed, you have only\n" - "*** executed the target code on a fixed set of inputs.\n" - "***\n"); - F->PrintFinalStats(); - exit(0); - } - - if (Flags.merge) { - if (Options.MaxLen == 0) - F->SetMaxInputLen(kMaxSaneLen); - if (Flags.merge_control_file) - F->CrashResistantMergeInternalStep(Flags.merge_control_file); - else - F->CrashResistantMerge(Args, *Inputs, - Flags.load_coverage_summary, - Flags.save_coverage_summary); - exit(0); - } - - size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen; - - UnitVector InitialCorpus; - for (auto &Inp : *Inputs) { - Printf("Loading corpus dir: %s\n", Inp.c_str()); - ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, - TemporaryMaxLen, /*ExitOnError=*/false); - } - - if (Flags.analyze_dict) { - if (Dictionary.empty() || Inputs->empty()) { - Printf("ERROR: can't analyze dict without dict and corpus provided\n"); - return 1; - } - if (AnalyzeDictionary(F, Dictionary, InitialCorpus)) { - Printf("Dictionary analysis failed\n"); - exit(1); - } - Printf("Dictionary analysis suceeded\n"); - exit(0); - } - - if (Options.MaxLen == 0) { - size_t MaxLen = 0; - for (auto &U : InitialCorpus) - MaxLen = std::max(U.size(), MaxLen); - F->SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen)); - } - - if (InitialCorpus.empty()) { - InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input. - if (Options.Verbosity) - Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); - } - F->ShuffleAndMinimize(&InitialCorpus); - InitialCorpus.clear(); // Don't need this memory any more. - F->Loop(); - - if (Flags.verbosity) - Printf("Done %zd runs in %zd second(s)\n", F->getTotalNumberOfRuns(), - F->secondsSinceProcessStartUp()); - F->PrintFinalStats(); - - exit(0); // Don't let F destroy itself. -} - -// Storage for global ExternalFunctions object. -ExternalFunctions *EF = nullptr; - -} // namespace fuzzer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctions.def llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctions.def --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctions.def 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctions.def 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -//===- FuzzerExtFunctions.def - External functions --------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This defines the external function pointers that -// ``fuzzer::ExternalFunctions`` should contain and try to initialize. The -// EXT_FUNC macro must be defined at the point of inclusion. The signature of -// the macro is: -// -// EXT_FUNC(, , , ) -//===----------------------------------------------------------------------===// - -// Optional user functions -EXT_FUNC(LLVMFuzzerInitialize, int, (int *argc, char ***argv), false); -EXT_FUNC(LLVMFuzzerCustomMutator, size_t, - (uint8_t * Data, size_t Size, size_t MaxSize, unsigned int Seed), - false); -EXT_FUNC(LLVMFuzzerCustomCrossOver, size_t, - (const uint8_t * Data1, size_t Size1, - const uint8_t * Data2, size_t Size2, - uint8_t * Out, size_t MaxOutSize, unsigned int Seed), - false); - -// Sanitizer functions -EXT_FUNC(__lsan_enable, void, (), false); -EXT_FUNC(__lsan_disable, void, (), false); -EXT_FUNC(__lsan_do_recoverable_leak_check, int, (), false); -EXT_FUNC(__sanitizer_install_malloc_and_free_hooks, int, - (void (*malloc_hook)(const volatile void *, size_t), - void (*free_hook)(const volatile void *)), - false); -EXT_FUNC(__sanitizer_print_memory_profile, int, (size_t, size_t), false); -EXT_FUNC(__sanitizer_print_stack_trace, void, (), true); -EXT_FUNC(__sanitizer_symbolize_pc, void, - (void *, const char *fmt, char *out_buf, size_t out_buf_size), false); -EXT_FUNC(__sanitizer_get_module_and_offset_for_pc, int, - (void *pc, char *module_path, - size_t module_path_len,void **pc_offset), false); -EXT_FUNC(__sanitizer_set_death_callback, void, (void (*)(void)), true); -EXT_FUNC(__sanitizer_set_report_fd, void, (void*), false); -EXT_FUNC(__sanitizer_dump_coverage, void, (const uintptr_t *, uintptr_t), - false); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -//===- FuzzerExtFunctionsDlsym.cpp - Interface to external functions ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation for operating systems that support dlsym(). We only use it on -// Apple platforms for now. We don't use this approach on Linux because it -// requires that clients of LibFuzzer pass ``--export-dynamic`` to the linker. -// That is a complication we don't wish to expose to clients right now. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_APPLE - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include - -using namespace fuzzer; - -template -static T GetFnPtr(const char *FnName, bool WarnIfMissing) { - dlerror(); // Clear any previous errors. - void *Fn = dlsym(RTLD_DEFAULT, FnName); - if (Fn == nullptr) { - if (WarnIfMissing) { - const char *ErrorMsg = dlerror(); - Printf("WARNING: Failed to find function \"%s\".", FnName); - if (ErrorMsg) - Printf(" Reason %s.", ErrorMsg); - Printf("\n"); - } - } - return reinterpret_cast(Fn); -} - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - this->NAME = GetFnPtr(#NAME, WARN) - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_APPLE diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -//===- FuzzerExtFunctionsDlsymWin.cpp - Interface to external functions ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation using dynamic loading for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "Windows.h" - -// This must be included after Windows.h. -#include "Psapi.h" - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { - HMODULE Modules[1024]; - DWORD BytesNeeded; - HANDLE CurrentProcess = GetCurrentProcess(); - - if (!EnumProcessModules(CurrentProcess, Modules, sizeof(Modules), - &BytesNeeded)) { - Printf("EnumProcessModules failed (error: %d).\n", GetLastError()); - exit(1); - } - - if (sizeof(Modules) < BytesNeeded) { - Printf("Error: the array is not big enough to hold all loaded modules.\n"); - exit(1); - } - - for (size_t i = 0; i < (BytesNeeded / sizeof(HMODULE)); i++) - { - FARPROC Fn; -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - if (this->NAME == nullptr) { \ - Fn = GetProcAddress(Modules[i], #NAME); \ - if (Fn == nullptr) \ - Fn = GetProcAddress(Modules[i], #NAME "__dll"); \ - this->NAME = (decltype(ExternalFunctions::NAME)) Fn; \ - } -#include "FuzzerExtFunctions.def" -#undef EXT_FUNC - } - -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - if (this->NAME == nullptr && WARN) \ - Printf("WARNING: Failed to find function \"%s\".\n", #NAME); -#include "FuzzerExtFunctions.def" -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctions.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctions.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctions.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctions.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -//===- FuzzerExtFunctions.h - Interface to external functions ---*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Defines an interface to (possibly optional) functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_EXT_FUNCTIONS_H -#define LLVM_FUZZER_EXT_FUNCTIONS_H - -#include -#include - -namespace fuzzer { - -struct ExternalFunctions { - // Initialize function pointers. Functions that are not available will be set - // to nullptr. Do not call this constructor before ``main()`` has been - // entered. - ExternalFunctions(); - -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - RETURN_TYPE(*NAME) FUNC_SIG = nullptr - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -}; -} // namespace fuzzer - -#endif diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,56 +0,0 @@ -//===- FuzzerExtFunctionsWeakAlias.cpp - Interface to external functions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation using weak aliases. Works for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" - -using namespace fuzzer; - -extern "C" { -// Declare these symbols as weak to allow them to be optionally defined. -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - RETURN_TYPE NAME##Def FUNC_SIG { \ - Printf("ERROR: Function \"%s\" not defined.\n", #NAME); \ - exit(1); \ - } \ - RETURN_TYPE NAME FUNC_SIG __attribute__((weak, alias(#NAME "Def"))); - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -template -static T *GetFnPtr(T *Fun, T *FunDef, const char *FnName, bool WarnIfMissing) { - if (Fun == FunDef) { - if (WarnIfMissing) - Printf("WARNING: Failed to find function \"%s\".\n", FnName); - return nullptr; - } - return Fun; -} - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - this->NAME = GetFnPtr(::NAME, ::NAME##Def, #NAME, WARN); - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,54 +0,0 @@ -//===- FuzzerExtFunctionsWeak.cpp - Interface to external functions -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation for Linux. This relies on the linker's support for weak -// symbols. We don't use this approach on Apple platforms because it requires -// clients of LibFuzzer to pass ``-U _`` to the linker to allow -// weak symbols to be undefined. That is a complication we don't want to expose -// to clients right now. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_LINUX - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" - -extern "C" { -// Declare these symbols as weak to allow them to be optionally defined. -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - __attribute__((weak)) RETURN_TYPE NAME FUNC_SIG - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -using namespace fuzzer; - -static void CheckFnPtr(void *FnPtr, const char *FnName, bool WarnIfMissing) { - if (FnPtr == nullptr && WarnIfMissing) { - Printf("WARNING: Failed to find function \"%s\".\n", FnName); - } -} - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - this->NAME = ::NAME; \ - CheckFnPtr(reinterpret_cast(reinterpret_cast(::NAME)), \ - #NAME, WARN); - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_LINUX diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtraCounters.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtraCounters.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerExtraCounters.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerExtraCounters.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Extra coverage counters defined by user code. -//===----------------------------------------------------------------------===// - -#include "FuzzerDefs.h" - -#if LIBFUZZER_LINUX -__attribute__((weak)) extern uint8_t __start___libfuzzer_extra_counters; -__attribute__((weak)) extern uint8_t __stop___libfuzzer_extra_counters; - -namespace fuzzer { -uint8_t *ExtraCountersBegin() { return &__start___libfuzzer_extra_counters; } -uint8_t *ExtraCountersEnd() { return &__stop___libfuzzer_extra_counters; } -ATTRIBUTE_NO_SANITIZE_ALL -void ClearExtraCounters() { // hand-written memset, don't asan-ify. - uintptr_t *Beg = reinterpret_cast(ExtraCountersBegin()); - uintptr_t *End = reinterpret_cast(ExtraCountersEnd()); - for (; Beg < End; Beg++) { - *Beg = 0; - __asm__ __volatile__("" : : : "memory"); - } -} - -} // namespace fuzzer - -#else -// TODO: implement for other platforms. -namespace fuzzer { -uint8_t *ExtraCountersBegin() { return nullptr; } -uint8_t *ExtraCountersEnd() { return nullptr; } -void ClearExtraCounters() {} -} // namespace fuzzer - -#endif diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerFlags.def llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerFlags.def --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerFlags.def 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerFlags.def 1970-01-01 00:00:00.000000000 +0000 @@ -1,139 +0,0 @@ -//===- FuzzerFlags.def - Run-time flags -------------------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the -// point of inclusion. We are not using any flag parsing library for better -// portability and independence. -//===----------------------------------------------------------------------===// -FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") -FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") -FUZZER_FLAG_INT(runs, -1, - "Number of individual test runs (-1 for infinite runs).") -FUZZER_FLAG_INT(max_len, 0, "Maximum length of the test input. " - "If 0, libFuzzer tries to guess a good value based on the corpus " - "and reports it. ") -FUZZER_FLAG_INT(experimental_len_control, 0, "experimental flag") -FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") -FUZZER_FLAG_INT(mutate_depth, 5, - "Apply this number of consecutive mutations to each input.") -FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup") -FUZZER_FLAG_INT(prefer_small, 1, - "If 1, always prefer smaller inputs during the corpus shuffle.") -FUZZER_FLAG_INT( - timeout, 1200, - "Timeout in seconds (if positive). " - "If one unit runs more than this number of seconds the process will abort.") -FUZZER_FLAG_INT(error_exitcode, 77, "When libFuzzer itself reports a bug " - "this exit code will be used.") -FUZZER_FLAG_INT(timeout_exitcode, 77, "When libFuzzer reports a timeout " - "this exit code will be used.") -FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total " - "time in seconds to run the fuzzer.") -FUZZER_FLAG_INT(help, 0, "Print help.") -FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be " - "merged into the 1-st corpus. Only interesting units will be taken. " - "This flag can be used to minimize a corpus.") -FUZZER_FLAG_STRING(merge_control_file, "internal flag") -FUZZER_FLAG_STRING(save_coverage_summary, "Experimental:" - " save coverage summary to a given file." - " Used with -merge=1") -FUZZER_FLAG_STRING(load_coverage_summary, "Experimental:" - " load coverage summary from a given file." - " Treat this coverage as belonging to the first corpus. " - " Used with -merge=1") -FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided" - " crash input. Use with -runs=N or -max_total_time=N to limit " - "the number attempts." - " Use with -exact_artifact_path to specify the output." - " Combine with ASAN_OPTIONS=dedup_token_length=3 (or similar) to ensure that" - " the minimized input triggers the same crash." - ) -FUZZER_FLAG_INT(cleanse_crash, 0, "If 1, tries to cleanse the provided" - " crash input to make it contain fewer original bytes." - " Use with -exact_artifact_path to specify the output." - ) -FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag") -FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") -FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters") -FUZZER_FLAG_INT(use_memmem, 1, - "Use hints from intercepting memmem, strstr, etc") -FUZZER_FLAG_INT(use_value_profile, 0, - "Experimental. Use value profile to guide fuzzing.") -FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations") -FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.") -FUZZER_FLAG_INT(reduce_inputs, 1, - "Try to reduce the size of inputs while preserving their full feature sets") -FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" - " this number of jobs in separate worker processes" - " with stdout/stderr redirected to fuzz-JOB.log.") -FUZZER_FLAG_UNSIGNED(workers, 0, - "Number of simultaneous worker processes to run the jobs." - " If zero, \"min(jobs,NumberOfCpuCores()/2)\" is used.") -FUZZER_FLAG_INT(reload, 1, - "Reload the main corpus every seconds to get new units" - " discovered by other processes. If 0, disabled") -FUZZER_FLAG_INT(report_slow_units, 10, - "Report slowest units if they run for more than this number of seconds.") -FUZZER_FLAG_INT(only_ascii, 0, - "If 1, generate only ASCII (isprint+isspace) inputs.") -FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.") -FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, " - "timeout, or slow inputs) as " - "$(artifact_prefix)file") -FUZZER_FLAG_STRING(exact_artifact_path, - "Write the single artifact on failure (crash, timeout) " - "as $(exact_artifact_path). This overrides -artifact_prefix " - "and will not use checksum in the file name. Do not " - "use the same path for several parallel processes.") -FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.") -FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.") -FUZZER_FLAG_INT(print_corpus_stats, 0, - "If 1, print statistics on corpus elements at exit.") -FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information as text" - " at exit. To-be-deprecated.") -FUZZER_FLAG_INT(dump_coverage, 0, "If 1, dump coverage information as a" - " .sancov file at exit. To-be-deprecated.") -FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.") -FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGBUS.") -FUZZER_FLAG_INT(handle_abrt, 1, "If 1, try to intercept SIGABRT.") -FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.") -FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.") -FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.") -FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.") -FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.") -FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; " - "if 2, close stderr; if 3, close both. " - "Be careful, this will also close e.g. asan's stderr/stdout.") -FUZZER_FLAG_INT(detect_leaks, 1, "If 1, and if LeakSanitizer is enabled " - "try to detect memory leaks during fuzzing (i.e. not only at shut down).") -FUZZER_FLAG_INT(trace_malloc, 0, "If >= 1 will print all mallocs/frees. " - "If >= 2 will also print stack traces.") -FUZZER_FLAG_INT(rss_limit_mb, 2048, "If non-zero, the fuzzer will exit upon" - "reaching this limit of RSS memory usage.") -FUZZER_FLAG_STRING(exit_on_src_pos, "Exit if a newly found PC originates" - " from the given source location. Example: -exit_on_src_pos=foo.cc:123. " - "Used primarily for testing libFuzzer itself.") -FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum" - " was added to the corpus. " - "Used primarily for testing libFuzzer itself.") -FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed " - "after this one. Useful for fuzzers that need to do their own " - "argument parsing.") - -FUZZER_FLAG_STRING(run_equivalence_server, "Experimental") -FUZZER_FLAG_STRING(use_equivalence_server, "Experimental") -FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") - -FUZZER_DEPRECATED_FLAG(exit_on_first) -FUZZER_DEPRECATED_FLAG(save_minimized_corpus) -FUZZER_DEPRECATED_FLAG(sync_command) -FUZZER_DEPRECATED_FLAG(sync_timeout) -FUZZER_DEPRECATED_FLAG(test_single_input) -FUZZER_DEPRECATED_FLAG(drill) -FUZZER_DEPRECATED_FLAG(truncate_units) -FUZZER_DEPRECATED_FLAG(output_csv) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerInterface.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerInterface.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerInterface.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerInterface.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,67 +0,0 @@ -//===- FuzzerInterface.h - Interface header for the Fuzzer ------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Define the interface between libFuzzer and the library being tested. -//===----------------------------------------------------------------------===// - -// NOTE: the libFuzzer interface is thin and in the majority of cases -// you should not include this file into your target. In 95% of cases -// all you need is to define the following function in your file: -// extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); - -// WARNING: keep the interface in C. - -#ifndef LLVM_FUZZER_INTERFACE_H -#define LLVM_FUZZER_INTERFACE_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Mandatory user-provided target function. -// Executes the code under test with [Data, Data+Size) as the input. -// libFuzzer will invoke this function *many* times with different inputs. -// Must return 0. -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); - -// Optional user-provided initialization function. -// If provided, this function will be called by libFuzzer once at startup. -// It may read and modify argc/argv. -// Must return 0. -int LLVMFuzzerInitialize(int *argc, char ***argv); - -// Optional user-provided custom mutator. -// Mutates raw data in [Data, Data+Size) inplace. -// Returns the new size, which is not greater than MaxSize. -// Given the same Seed produces the same mutation. -size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize, - unsigned int Seed); - -// Optional user-provided custom cross-over function. -// Combines pieces of Data1 & Data2 together into Out. -// Returns the new size, which is not greater than MaxOutSize. -// Should produce the same mutation given the same Seed. -size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize, - unsigned int Seed); - -// Experimental, may go away in future. -// libFuzzer-provided function to be used inside LLVMFuzzerCustomMutator. -// Mutates raw data in [Data, Data+Size) inplace. -// Returns the new size, which is not greater than MaxSize. -size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // LLVM_FUZZER_INTERFACE_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerInternal.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerInternal.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerInternal.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerInternal.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,150 +0,0 @@ -//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Define the main class fuzzer::Fuzzer and most functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_INTERNAL_H -#define LLVM_FUZZER_INTERNAL_H - -#include "FuzzerDefs.h" -#include "FuzzerExtFunctions.h" -#include "FuzzerInterface.h" -#include "FuzzerOptions.h" -#include "FuzzerSHA1.h" -#include "FuzzerValueBitMap.h" -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -using namespace std::chrono; - -class Fuzzer { -public: - - Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, - FuzzingOptions Options); - ~Fuzzer(); - void Loop(); - void MinimizeCrashLoop(const Unit &U); - void ShuffleAndMinimize(UnitVector *V); - void RereadOutputCorpus(size_t MaxSize); - - size_t secondsSinceProcessStartUp() { - return duration_cast(system_clock::now() - ProcessStartTime) - .count(); - } - - bool TimedOut() { - return Options.MaxTotalTimeSec > 0 && - secondsSinceProcessStartUp() > - static_cast(Options.MaxTotalTimeSec); - } - - size_t execPerSec() { - size_t Seconds = secondsSinceProcessStartUp(); - return Seconds ? TotalNumberOfRuns / Seconds : 0; - } - - size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } - - static void StaticAlarmCallback(); - static void StaticCrashSignalCallback(); - static void StaticExitCallback(); - static void StaticInterruptCallback(); - static void StaticFileSizeExceedCallback(); - - void ExecuteCallback(const uint8_t *Data, size_t Size); - bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, - InputInfo *II = nullptr); - - // Merge Corpora[1:] into Corpora[0]. - void Merge(const std::vector &Corpora); - void CrashResistantMerge(const std::vector &Args, - const std::vector &Corpora, - const char *CoverageSummaryInputPathOrNull, - const char *CoverageSummaryOutputPathOrNull); - void CrashResistantMergeInternalStep(const std::string &ControlFilePath); - MutationDispatcher &GetMD() { return MD; } - void PrintFinalStats(); - void SetMaxInputLen(size_t MaxInputLen); - void SetMaxMutationLen(size_t MaxMutationLen); - void RssLimitCallback(); - - bool InFuzzingThread() const { return IsMyThread; } - size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; - void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, - bool DuringInitialCorpusExecution); - - void HandleMalloc(size_t Size); - void AnnounceOutput(const uint8_t *Data, size_t Size); - -private: - void AlarmCallback(); - void CrashCallback(); - void ExitCallback(); - void CrashOnOverwrittenData(); - void InterruptCallback(); - void MutateAndTestOne(); - void ReportNewCoverage(InputInfo *II, const Unit &U); - void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); - void WriteToOutputCorpus(const Unit &U); - void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); - void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); - void PrintStatusForNewUnit(const Unit &U, const char *Text); - void ShuffleCorpus(UnitVector *V); - void CheckExitOnSrcPosOrItem(); - - static void StaticDeathCallback(); - void DumpCurrentUnit(const char *Prefix); - void DeathCallback(); - - void AllocateCurrentUnitData(); - uint8_t *CurrentUnitData = nullptr; - std::atomic CurrentUnitSize; - uint8_t BaseSha1[kSHA1NumBytes]; // Checksum of the base unit. - bool RunningCB = false; - - size_t TotalNumberOfRuns = 0; - size_t NumberOfNewUnitsAdded = 0; - - size_t LastCorpusUpdateRun = 0; - system_clock::time_point LastCorpusUpdateTime = system_clock::now(); - - - bool HasMoreMallocsThanFrees = false; - size_t NumberOfLeakDetectionAttempts = 0; - - UserCallback CB; - InputCorpus &Corpus; - MutationDispatcher &MD; - FuzzingOptions Options; - - system_clock::time_point ProcessStartTime = system_clock::now(); - system_clock::time_point UnitStartTime, UnitStopTime; - long TimeOfLongestUnitInSeconds = 0; - long EpochOfLastReadOfOutputCorpus = 0; - - size_t MaxInputLen = 0; - size_t MaxMutationLen = 0; - size_t TmpMaxMutationLen = 0; - - std::vector UniqFeatureSetTmp; - - // Need to know our own thread. - static thread_local bool IsMyThread; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_INTERNAL_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIO.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIO.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIO.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIO.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,120 +0,0 @@ -//===- FuzzerIO.cpp - IO utils. -------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO functions. -//===----------------------------------------------------------------------===// - -#include "FuzzerIO.h" -#include "FuzzerDefs.h" -#include "FuzzerExtFunctions.h" -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -static FILE *OutputFile = stderr; - -long GetEpoch(const std::string &Path) { - struct stat St; - if (stat(Path.c_str(), &St)) - return 0; // Can't stat, be conservative. - return St.st_mtime; -} - -Unit FileToVector(const std::string &Path, size_t MaxSize, bool ExitOnError) { - std::ifstream T(Path); - if (ExitOnError && !T) { - Printf("No such directory: %s; exiting\n", Path.c_str()); - exit(1); - } - - T.seekg(0, T.end); - auto EndPos = T.tellg(); - if (EndPos < 0) return {}; - size_t FileLen = EndPos; - if (MaxSize) - FileLen = std::min(FileLen, MaxSize); - - T.seekg(0, T.beg); - Unit Res(FileLen); - T.read(reinterpret_cast(Res.data()), FileLen); - return Res; -} - -std::string FileToString(const std::string &Path) { - std::ifstream T(Path); - return std::string((std::istreambuf_iterator(T)), - std::istreambuf_iterator()); -} - -void CopyFileToErr(const std::string &Path) { - Printf("%s", FileToString(Path).c_str()); -} - -void WriteToFile(const Unit &U, const std::string &Path) { - // Use raw C interface because this function may be called from a sig handler. - FILE *Out = fopen(Path.c_str(), "w"); - if (!Out) return; - fwrite(U.data(), sizeof(U[0]), U.size(), Out); - fclose(Out); -} - -void ReadDirToVectorOfUnits(const char *Path, std::vector *V, - long *Epoch, size_t MaxSize, bool ExitOnError) { - long E = Epoch ? *Epoch : 0; - std::vector Files; - ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true); - size_t NumLoaded = 0; - for (size_t i = 0; i < Files.size(); i++) { - auto &X = Files[i]; - if (Epoch && GetEpoch(X) < E) continue; - NumLoaded++; - if ((NumLoaded & (NumLoaded - 1)) == 0 && NumLoaded >= 1024) - Printf("Loaded %zd/%zd files from %s\n", NumLoaded, Files.size(), Path); - auto S = FileToVector(X, MaxSize, ExitOnError); - if (!S.empty()) - V->push_back(S); - } -} - -std::string DirPlusFile(const std::string &DirPath, - const std::string &FileName) { - return DirPath + GetSeparator() + FileName; -} - -void DupAndCloseStderr() { - int OutputFd = DuplicateFile(2); - if (OutputFd > 0) { - FILE *NewOutputFile = OpenFile(OutputFd, "w"); - if (NewOutputFile) { - OutputFile = NewOutputFile; - if (EF->__sanitizer_set_report_fd) - EF->__sanitizer_set_report_fd( - reinterpret_cast(GetHandleFromFd(OutputFd))); - DiscardOutput(2); - } - } -} - -void CloseStdout() { - DiscardOutput(1); -} - -void Printf(const char *Fmt, ...) { - va_list ap; - va_start(ap, Fmt); - vfprintf(OutputFile, Fmt, ap); - va_end(ap); - fflush(OutputFile); -} - -} // namespace fuzzer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIO.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIO.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIO.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIO.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -//===- FuzzerIO.h - Internal header for IO utils ----------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO interface. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_IO_H -#define LLVM_FUZZER_IO_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -long GetEpoch(const std::string &Path); - -Unit FileToVector(const std::string &Path, size_t MaxSize = 0, - bool ExitOnError = true); - -std::string FileToString(const std::string &Path); - -void CopyFileToErr(const std::string &Path); - -void WriteToFile(const Unit &U, const std::string &Path); - -void ReadDirToVectorOfUnits(const char *Path, std::vector *V, - long *Epoch, size_t MaxSize, bool ExitOnError); - -// Returns "Dir/FileName" or equivalent for the current OS. -std::string DirPlusFile(const std::string &DirPath, - const std::string &FileName); - -// Returns the name of the dir, similar to the 'dirname' utility. -std::string DirName(const std::string &FileName); - -// Returns path to a TmpDir. -std::string TmpDir(); - -bool IsInterestingCoverageFile(const std::string &FileName); - -void DupAndCloseStderr(); - -void CloseStdout(); - -void Printf(const char *Fmt, ...); - -// Print using raw syscalls, useful when printing at early init stages. -void RawPrint(const char *Str); - -// Platform specific functions: -bool IsFile(const std::string &Path); - -void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, - std::vector *V, bool TopDir); - -char GetSeparator(); - -FILE* OpenFile(int Fd, const char *Mode); - -int CloseFile(int Fd); - -int DuplicateFile(int Fd); - -void RemoveFile(const std::string &Path); - -void DiscardOutput(int Fd); - -intptr_t GetHandleFromFd(int fd); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_IO_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIOPosix.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIOPosix.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIOPosix.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIOPosix.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,123 +0,0 @@ -//===- FuzzerIOPosix.cpp - IO utils for Posix. ----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO functions implementation using Posix API. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_POSIX - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -bool IsFile(const std::string &Path) { - struct stat St; - if (stat(Path.c_str(), &St)) - return false; - return S_ISREG(St.st_mode); -} - -void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, - std::vector *V, bool TopDir) { - auto E = GetEpoch(Dir); - if (Epoch) - if (E && *Epoch >= E) return; - - DIR *D = opendir(Dir.c_str()); - if (!D) { - Printf("No such directory: %s; exiting\n", Dir.c_str()); - exit(1); - } - while (auto E = readdir(D)) { - std::string Path = DirPlusFile(Dir, E->d_name); - if (E->d_type == DT_REG || E->d_type == DT_LNK) - V->push_back(Path); - else if (E->d_type == DT_DIR && *E->d_name != '.') - ListFilesInDirRecursive(Path, Epoch, V, false); - } - closedir(D); - if (Epoch && TopDir) - *Epoch = E; -} - -char GetSeparator() { - return '/'; -} - -FILE* OpenFile(int Fd, const char* Mode) { - return fdopen(Fd, Mode); -} - -int CloseFile(int fd) { - return close(fd); -} - -int DuplicateFile(int Fd) { - return dup(Fd); -} - -void RemoveFile(const std::string &Path) { - unlink(Path.c_str()); -} - -void DiscardOutput(int Fd) { - FILE* Temp = fopen("/dev/null", "w"); - if (!Temp) - return; - dup2(fileno(Temp), Fd); - fclose(Temp); -} - -intptr_t GetHandleFromFd(int fd) { - return static_cast(fd); -} - -std::string DirName(const std::string &FileName) { - char *Tmp = new char[FileName.size() + 1]; - memcpy(Tmp, FileName.c_str(), FileName.size() + 1); - std::string Res = dirname(Tmp); - delete [] Tmp; - return Res; -} - -std::string TmpDir() { - if (auto Env = getenv("TMPDIR")) - return Env; - return "/tmp"; -} - -bool IsInterestingCoverageFile(const std::string &FileName) { - if (FileName.find("compiler-rt/lib/") != std::string::npos) - return false; // sanitizer internal. - if (FileName.find("/usr/lib/") != std::string::npos) - return false; - if (FileName.find("/usr/include/") != std::string::npos) - return false; - if (FileName == "") - return false; - return true; -} - - -void RawPrint(const char *Str) { - write(2, Str, strlen(Str)); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_POSIX diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIOWindows.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIOWindows.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerIOWindows.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerIOWindows.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,323 +0,0 @@ -//===- FuzzerIOWindows.cpp - IO utils for Windows. ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO functions implementation for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -static bool IsFile(const std::string &Path, const DWORD &FileAttributes) { - - if (FileAttributes & FILE_ATTRIBUTE_NORMAL) - return true; - - if (FileAttributes & FILE_ATTRIBUTE_DIRECTORY) - return false; - - HANDLE FileHandle( - CreateFileA(Path.c_str(), 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, 0)); - - if (FileHandle == INVALID_HANDLE_VALUE) { - Printf("CreateFileA() failed for \"%s\" (Error code: %lu).\n", Path.c_str(), - GetLastError()); - return false; - } - - DWORD FileType = GetFileType(FileHandle); - - if (FileType == FILE_TYPE_UNKNOWN) { - Printf("GetFileType() failed for \"%s\" (Error code: %lu).\n", Path.c_str(), - GetLastError()); - CloseHandle(FileHandle); - return false; - } - - if (FileType != FILE_TYPE_DISK) { - CloseHandle(FileHandle); - return false; - } - - CloseHandle(FileHandle); - return true; -} - -bool IsFile(const std::string &Path) { - DWORD Att = GetFileAttributesA(Path.c_str()); - - if (Att == INVALID_FILE_ATTRIBUTES) { - Printf("GetFileAttributesA() failed for \"%s\" (Error code: %lu).\n", - Path.c_str(), GetLastError()); - return false; - } - - return IsFile(Path, Att); -} - -void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, - std::vector *V, bool TopDir) { - auto E = GetEpoch(Dir); - if (Epoch) - if (E && *Epoch >= E) return; - - std::string Path(Dir); - assert(!Path.empty()); - if (Path.back() != '\\') - Path.push_back('\\'); - Path.push_back('*'); - - // Get the first directory entry. - WIN32_FIND_DATAA FindInfo; - HANDLE FindHandle(FindFirstFileA(Path.c_str(), &FindInfo)); - if (FindHandle == INVALID_HANDLE_VALUE) - { - if (GetLastError() == ERROR_FILE_NOT_FOUND) - return; - Printf("No such directory: %s; exiting\n", Dir.c_str()); - exit(1); - } - - do { - std::string FileName = DirPlusFile(Dir, FindInfo.cFileName); - - if (FindInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - size_t FilenameLen = strlen(FindInfo.cFileName); - if ((FilenameLen == 1 && FindInfo.cFileName[0] == '.') || - (FilenameLen == 2 && FindInfo.cFileName[0] == '.' && - FindInfo.cFileName[1] == '.')) - continue; - - ListFilesInDirRecursive(FileName, Epoch, V, false); - } - else if (IsFile(FileName, FindInfo.dwFileAttributes)) - V->push_back(FileName); - } while (FindNextFileA(FindHandle, &FindInfo)); - - DWORD LastError = GetLastError(); - if (LastError != ERROR_NO_MORE_FILES) - Printf("FindNextFileA failed (Error code: %lu).\n", LastError); - - FindClose(FindHandle); - - if (Epoch && TopDir) - *Epoch = E; -} - -char GetSeparator() { - return '\\'; -} - -FILE* OpenFile(int Fd, const char* Mode) { - return _fdopen(Fd, Mode); -} - -int CloseFile(int Fd) { - return _close(Fd); -} - -int DuplicateFile(int Fd) { - return _dup(Fd); -} - -void RemoveFile(const std::string &Path) { - _unlink(Path.c_str()); -} - -void DiscardOutput(int Fd) { - FILE* Temp = fopen("nul", "w"); - if (!Temp) - return; - _dup2(_fileno(Temp), Fd); - fclose(Temp); -} - -intptr_t GetHandleFromFd(int fd) { - return _get_osfhandle(fd); -} - -static bool IsSeparator(char C) { - return C == '\\' || C == '/'; -} - -// Parse disk designators, like "C:\". If Relative == true, also accepts: "C:". -// Returns number of characters considered if successful. -static size_t ParseDrive(const std::string &FileName, const size_t Offset, - bool Relative = true) { - if (Offset + 1 >= FileName.size() || FileName[Offset + 1] != ':') - return 0; - if (Offset + 2 >= FileName.size() || !IsSeparator(FileName[Offset + 2])) { - if (!Relative) // Accept relative path? - return 0; - else - return 2; - } - return 3; -} - -// Parse a file name, like: SomeFile.txt -// Returns number of characters considered if successful. -static size_t ParseFileName(const std::string &FileName, const size_t Offset) { - size_t Pos = Offset; - const size_t End = FileName.size(); - for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos) - ; - return Pos - Offset; -} - -// Parse a directory ending in separator, like: `SomeDir\` -// Returns number of characters considered if successful. -static size_t ParseDir(const std::string &FileName, const size_t Offset) { - size_t Pos = Offset; - const size_t End = FileName.size(); - if (Pos >= End || IsSeparator(FileName[Pos])) - return 0; - for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos) - ; - if (Pos >= End) - return 0; - ++Pos; // Include separator. - return Pos - Offset; -} - -// Parse a servername and share, like: `SomeServer\SomeShare\` -// Returns number of characters considered if successful. -static size_t ParseServerAndShare(const std::string &FileName, - const size_t Offset) { - size_t Pos = Offset, Res; - if (!(Res = ParseDir(FileName, Pos))) - return 0; - Pos += Res; - if (!(Res = ParseDir(FileName, Pos))) - return 0; - Pos += Res; - return Pos - Offset; -} - -// Parse the given Ref string from the position Offset, to exactly match the given -// string Patt. -// Returns number of characters considered if successful. -static size_t ParseCustomString(const std::string &Ref, size_t Offset, - const char *Patt) { - size_t Len = strlen(Patt); - if (Offset + Len > Ref.size()) - return 0; - return Ref.compare(Offset, Len, Patt) == 0 ? Len : 0; -} - -// Parse a location, like: -// \\?\UNC\Server\Share\ \\?\C:\ \\Server\Share\ \ C:\ C: -// Returns number of characters considered if successful. -static size_t ParseLocation(const std::string &FileName) { - size_t Pos = 0, Res; - - if ((Res = ParseCustomString(FileName, Pos, R"(\\?\)"))) { - Pos += Res; - if ((Res = ParseCustomString(FileName, Pos, R"(UNC\)"))) { - Pos += Res; - if ((Res = ParseServerAndShare(FileName, Pos))) - return Pos + Res; - return 0; - } - if ((Res = ParseDrive(FileName, Pos, false))) - return Pos + Res; - return 0; - } - - if (Pos < FileName.size() && IsSeparator(FileName[Pos])) { - ++Pos; - if (Pos < FileName.size() && IsSeparator(FileName[Pos])) { - ++Pos; - if ((Res = ParseServerAndShare(FileName, Pos))) - return Pos + Res; - return 0; - } - return Pos; - } - - if ((Res = ParseDrive(FileName, Pos))) - return Pos + Res; - - return Pos; -} - -std::string DirName(const std::string &FileName) { - size_t LocationLen = ParseLocation(FileName); - size_t DirLen = 0, Res; - while ((Res = ParseDir(FileName, LocationLen + DirLen))) - DirLen += Res; - size_t FileLen = ParseFileName(FileName, LocationLen + DirLen); - - if (LocationLen + DirLen + FileLen != FileName.size()) { - Printf("DirName() failed for \"%s\", invalid path.\n", FileName.c_str()); - exit(1); - } - - if (DirLen) { - --DirLen; // Remove trailing separator. - if (!FileLen) { // Path ended in separator. - assert(DirLen); - // Remove file name from Dir. - while (DirLen && !IsSeparator(FileName[LocationLen + DirLen - 1])) - --DirLen; - if (DirLen) // Remove trailing separator. - --DirLen; - } - } - - if (!LocationLen) { // Relative path. - if (!DirLen) - return "."; - return std::string(".\\").append(FileName, 0, DirLen); - } - - return FileName.substr(0, LocationLen + DirLen); -} - -std::string TmpDir() { - std::string Tmp; - Tmp.resize(MAX_PATH + 1); - DWORD Size = GetTempPathA(Tmp.size(), &Tmp[0]); - if (Size == 0) { - Printf("Couldn't get Tmp path.\n"); - exit(1); - } - Tmp.resize(Size); - return Tmp; -} - -bool IsInterestingCoverageFile(const std::string &FileName) { - if (FileName.find("Program Files") != std::string::npos) - return false; - if (FileName.find("compiler-rt\\lib\\") != std::string::npos) - return false; // sanitizer internal. - if (FileName == "") - return false; - return true; -} - -void RawPrint(const char *Str) { - // Not tested, may or may not work. Fix if needed. - Printf("%s", Str); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerLoop.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerLoop.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerLoop.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerLoop.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,722 +0,0 @@ -//===- FuzzerLoop.cpp - Fuzzer's main loop --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Fuzzer's main loop. -//===----------------------------------------------------------------------===// - -#include "FuzzerCorpus.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include "FuzzerMutate.h" -#include "FuzzerRandom.h" -#include "FuzzerShmem.h" -#include "FuzzerTracePC.h" -#include -#include -#include -#include - -#if defined(__has_include) -#if __has_include() -#include -#endif -#endif - -#define NO_SANITIZE_MEMORY -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#undef NO_SANITIZE_MEMORY -#define NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) -#endif -#endif - -namespace fuzzer { -static const size_t kMaxUnitSizeToPrint = 256; - -thread_local bool Fuzzer::IsMyThread; - -SharedMemoryRegion SMR; - -// Only one Fuzzer per process. -static Fuzzer *F; - -// Leak detection is expensive, so we first check if there were more mallocs -// than frees (using the sanitizer malloc hooks) and only then try to call lsan. -struct MallocFreeTracer { - void Start(int TraceLevel) { - this->TraceLevel = TraceLevel; - if (TraceLevel) - Printf("MallocFreeTracer: START\n"); - Mallocs = 0; - Frees = 0; - } - // Returns true if there were more mallocs than frees. - bool Stop() { - if (TraceLevel) - Printf("MallocFreeTracer: STOP %zd %zd (%s)\n", Mallocs.load(), - Frees.load(), Mallocs == Frees ? "same" : "DIFFERENT"); - bool Result = Mallocs > Frees; - Mallocs = 0; - Frees = 0; - TraceLevel = 0; - return Result; - } - std::atomic Mallocs; - std::atomic Frees; - int TraceLevel = 0; -}; - -static MallocFreeTracer AllocTracer; - -ATTRIBUTE_NO_SANITIZE_MEMORY -void MallocHook(const volatile void *ptr, size_t size) { - size_t N = AllocTracer.Mallocs++; - F->HandleMalloc(size); - if (int TraceLevel = AllocTracer.TraceLevel) { - Printf("MALLOC[%zd] %p %zd\n", N, ptr, size); - if (TraceLevel >= 2 && EF) - EF->__sanitizer_print_stack_trace(); - } -} - -ATTRIBUTE_NO_SANITIZE_MEMORY -void FreeHook(const volatile void *ptr) { - size_t N = AllocTracer.Frees++; - if (int TraceLevel = AllocTracer.TraceLevel) { - Printf("FREE[%zd] %p\n", N, ptr); - if (TraceLevel >= 2 && EF) - EF->__sanitizer_print_stack_trace(); - } -} - -// Crash on a single malloc that exceeds the rss limit. -void Fuzzer::HandleMalloc(size_t Size) { - if (!Options.RssLimitMb || (Size >> 20) < (size_t)Options.RssLimitMb) - return; - Printf("==%d== ERROR: libFuzzer: out-of-memory (malloc(%zd))\n", GetPid(), - Size); - Printf(" To change the out-of-memory limit use -rss_limit_mb=\n\n"); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - DumpCurrentUnit("oom-"); - Printf("SUMMARY: libFuzzer: out-of-memory\n"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, - FuzzingOptions Options) - : CB(CB), Corpus(Corpus), MD(MD), Options(Options) { - if (EF->__sanitizer_set_death_callback) - EF->__sanitizer_set_death_callback(StaticDeathCallback); - assert(!F); - F = this; - TPC.ResetMaps(); - IsMyThread = true; - if (Options.DetectLeaks && EF->__sanitizer_install_malloc_and_free_hooks) - EF->__sanitizer_install_malloc_and_free_hooks(MallocHook, FreeHook); - TPC.SetUseCounters(Options.UseCounters); - TPC.SetUseValueProfile(Options.UseValueProfile); - - if (Options.Verbosity) - TPC.PrintModuleInfo(); - if (!Options.OutputCorpus.empty() && Options.ReloadIntervalSec) - EpochOfLastReadOfOutputCorpus = GetEpoch(Options.OutputCorpus); - MaxInputLen = MaxMutationLen = Options.MaxLen; - TmpMaxMutationLen = Max(size_t(4), Corpus.MaxInputSize()); - AllocateCurrentUnitData(); - CurrentUnitSize = 0; - memset(BaseSha1, 0, sizeof(BaseSha1)); -} - -Fuzzer::~Fuzzer() { } - -void Fuzzer::AllocateCurrentUnitData() { - if (CurrentUnitData || MaxInputLen == 0) return; - CurrentUnitData = new uint8_t[MaxInputLen]; -} - -void Fuzzer::StaticDeathCallback() { - assert(F); - F->DeathCallback(); -} - -void Fuzzer::DumpCurrentUnit(const char *Prefix) { - if (!CurrentUnitData) return; // Happens when running individual inputs. - MD.PrintMutationSequence(); - Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str()); - size_t UnitSize = CurrentUnitSize; - if (UnitSize <= kMaxUnitSizeToPrint) { - PrintHexArray(CurrentUnitData, UnitSize, "\n"); - PrintASCII(CurrentUnitData, UnitSize, "\n"); - } - WriteUnitToFileWithPrefix({CurrentUnitData, CurrentUnitData + UnitSize}, - Prefix); -} - -NO_SANITIZE_MEMORY -void Fuzzer::DeathCallback() { - DumpCurrentUnit("crash-"); - PrintFinalStats(); -} - -void Fuzzer::StaticAlarmCallback() { - assert(F); - F->AlarmCallback(); -} - -void Fuzzer::StaticCrashSignalCallback() { - assert(F); - F->CrashCallback(); -} - -void Fuzzer::StaticExitCallback() { - assert(F); - F->ExitCallback(); -} - -void Fuzzer::StaticInterruptCallback() { - assert(F); - F->InterruptCallback(); -} - -void Fuzzer::StaticFileSizeExceedCallback() { - Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid()); - exit(1); -} - -void Fuzzer::CrashCallback() { - Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid()); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - Printf("NOTE: libFuzzer has rudimentary signal handlers.\n" - " Combine libFuzzer with AddressSanitizer or similar for better " - "crash reports.\n"); - Printf("SUMMARY: libFuzzer: deadly signal\n"); - DumpCurrentUnit("crash-"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -void Fuzzer::ExitCallback() { - if (!RunningCB) - return; // This exit did not come from the user callback - Printf("==%lu== ERROR: libFuzzer: fuzz target exited\n", GetPid()); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - Printf("SUMMARY: libFuzzer: fuzz target exited\n"); - DumpCurrentUnit("crash-"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); -} - - -void Fuzzer::InterruptCallback() { - Printf("==%lu== libFuzzer: run interrupted; exiting\n", GetPid()); - PrintFinalStats(); - _Exit(0); // Stop right now, don't perform any at-exit actions. -} - -NO_SANITIZE_MEMORY -void Fuzzer::AlarmCallback() { - assert(Options.UnitTimeoutSec > 0); - // In Windows Alarm callback is executed by a different thread. -#if !LIBFUZZER_WINDOWS - if (!InFuzzingThread()) return; -#endif - if (!RunningCB) - return; // We have not started running units yet. - size_t Seconds = - duration_cast(system_clock::now() - UnitStartTime).count(); - if (Seconds == 0) - return; - if (Options.Verbosity >= 2) - Printf("AlarmCallback %zd\n", Seconds); - if (Seconds >= (size_t)Options.UnitTimeoutSec) { - Printf("ALARM: working on the last Unit for %zd seconds\n", Seconds); - Printf(" and the timeout value is %d (use -timeout=N to change)\n", - Options.UnitTimeoutSec); - DumpCurrentUnit("timeout-"); - Printf("==%lu== ERROR: libFuzzer: timeout after %d seconds\n", GetPid(), - Seconds); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - Printf("SUMMARY: libFuzzer: timeout\n"); - PrintFinalStats(); - _Exit(Options.TimeoutExitCode); // Stop right now. - } -} - -void Fuzzer::RssLimitCallback() { - Printf( - "==%lu== ERROR: libFuzzer: out-of-memory (used: %zdMb; limit: %zdMb)\n", - GetPid(), GetPeakRSSMb(), Options.RssLimitMb); - Printf(" To change the out-of-memory limit use -rss_limit_mb=\n\n"); - if (EF->__sanitizer_print_memory_profile) - EF->__sanitizer_print_memory_profile(95, 8); - DumpCurrentUnit("oom-"); - Printf("SUMMARY: libFuzzer: out-of-memory\n"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -void Fuzzer::PrintStats(const char *Where, const char *End, size_t Units) { - size_t ExecPerSec = execPerSec(); - if (!Options.Verbosity) - return; - Printf("#%zd\t%s", TotalNumberOfRuns, Where); - if (size_t N = TPC.GetTotalPCCoverage()) - Printf(" cov: %zd", N); - if (size_t N = Corpus.NumFeatures()) - Printf( " ft: %zd", N); - if (!Corpus.empty()) { - Printf(" corp: %zd", Corpus.NumActiveUnits()); - if (size_t N = Corpus.SizeInBytes()) { - if (N < (1<<14)) - Printf("/%zdb", N); - else if (N < (1 << 24)) - Printf("/%zdKb", N >> 10); - else - Printf("/%zdMb", N >> 20); - } - } - if (Units) - Printf(" units: %zd", Units); - - Printf(" exec/s: %zd", ExecPerSec); - Printf(" rss: %zdMb", GetPeakRSSMb()); - Printf("%s", End); -} - -void Fuzzer::PrintFinalStats() { - if (Options.PrintCoverage) - TPC.PrintCoverage(); - if (Options.DumpCoverage) - TPC.DumpCoverage(); - if (Options.PrintCorpusStats) - Corpus.PrintStats(); - if (!Options.PrintFinalStats) return; - size_t ExecPerSec = execPerSec(); - Printf("stat::number_of_executed_units: %zd\n", TotalNumberOfRuns); - Printf("stat::average_exec_per_sec: %zd\n", ExecPerSec); - Printf("stat::new_units_added: %zd\n", NumberOfNewUnitsAdded); - Printf("stat::slowest_unit_time_sec: %zd\n", TimeOfLongestUnitInSeconds); - Printf("stat::peak_rss_mb: %zd\n", GetPeakRSSMb()); -} - -void Fuzzer::SetMaxInputLen(size_t MaxInputLen) { - assert(this->MaxInputLen == 0); // Can only reset MaxInputLen from 0 to non-0. - assert(MaxInputLen); - this->MaxInputLen = MaxInputLen; - this->MaxMutationLen = MaxInputLen; - AllocateCurrentUnitData(); - Printf("INFO: -max_len is not provided; " - "libFuzzer will not generate inputs larger than %zd bytes\n", - MaxInputLen); -} - -void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) { - assert(MaxMutationLen && MaxMutationLen <= MaxInputLen); - this->MaxMutationLen = MaxMutationLen; -} - -void Fuzzer::CheckExitOnSrcPosOrItem() { - if (!Options.ExitOnSrcPos.empty()) { - static auto *PCsSet = new std::set; - auto HandlePC = [&](uintptr_t PC) { - if (!PCsSet->insert(PC).second) return; - std::string Descr = DescribePC("%F %L", PC + 1); - if (Descr.find(Options.ExitOnSrcPos) != std::string::npos) { - Printf("INFO: found line matching '%s', exiting.\n", - Options.ExitOnSrcPos.c_str()); - _Exit(0); - } - }; - TPC.ForEachObservedPC(HandlePC); - } - if (!Options.ExitOnItem.empty()) { - if (Corpus.HasUnit(Options.ExitOnItem)) { - Printf("INFO: found item with checksum '%s', exiting.\n", - Options.ExitOnItem.c_str()); - _Exit(0); - } - } -} - -void Fuzzer::RereadOutputCorpus(size_t MaxSize) { - if (Options.OutputCorpus.empty() || !Options.ReloadIntervalSec) return; - std::vector AdditionalCorpus; - ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, - &EpochOfLastReadOfOutputCorpus, MaxSize, - /*ExitOnError*/ false); - if (Options.Verbosity >= 2) - Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); - bool Reloaded = false; - for (auto &U : AdditionalCorpus) { - if (U.size() > MaxSize) - U.resize(MaxSize); - if (!Corpus.HasUnit(U)) { - if (RunOne(U.data(), U.size())) { - CheckExitOnSrcPosOrItem(); - Reloaded = true; - } - } - } - if (Reloaded) - PrintStats("RELOAD"); -} - -void Fuzzer::ShuffleCorpus(UnitVector *V) { - std::shuffle(V->begin(), V->end(), MD.GetRand()); - if (Options.PreferSmall) - std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) { - return A.size() < B.size(); - }); -} - -void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { - Printf("#0\tREAD units: %zd\n", InitialCorpus->size()); - if (Options.ShuffleAtStartUp) - ShuffleCorpus(InitialCorpus); - - // Test the callback with empty input and never try it again. - uint8_t dummy; - ExecuteCallback(&dummy, 0); - - for (auto &U : *InitialCorpus) { - RunOne(U.data(), U.size()); - CheckExitOnSrcPosOrItem(); - TryDetectingAMemoryLeak(U.data(), U.size(), - /*DuringInitialCorpusExecution*/ true); - U.clear(); - } - PrintStats("INITED"); - if (Corpus.empty()) { - Printf("ERROR: no interesting inputs were found. " - "Is the code instrumented for coverage? Exiting.\n"); - exit(1); - } -} - -void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { - auto TimeOfUnit = - duration_cast(UnitStopTime - UnitStartTime).count(); - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && - secondsSinceProcessStartUp() >= 2) - PrintStats("pulse "); - if (TimeOfUnit > TimeOfLongestUnitInSeconds * 1.1 && - TimeOfUnit >= Options.ReportSlowUnits) { - TimeOfLongestUnitInSeconds = TimeOfUnit; - Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); - WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-"); - } -} - -bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, - InputInfo *II) { - if (!Size) return false; - - ExecuteCallback(Data, Size); - - UniqFeatureSetTmp.clear(); - size_t FoundUniqFeaturesOfII = 0; - size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); - TPC.CollectFeatures([&](size_t Feature) { - if (Corpus.AddFeature(Feature, Size, Options.Shrink)) - UniqFeatureSetTmp.push_back(Feature); - if (Options.ReduceInputs && II) - if (std::binary_search(II->UniqFeatureSet.begin(), - II->UniqFeatureSet.end(), Feature)) - FoundUniqFeaturesOfII++; - }); - PrintPulseAndReportSlowInput(Data, Size); - size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; - if (NumNewFeatures) { - TPC.UpdateObservedPCs(); - Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, - UniqFeatureSetTmp); - return true; - } - if (II && FoundUniqFeaturesOfII && - FoundUniqFeaturesOfII == II->UniqFeatureSet.size() && - II->U.size() > Size) { - Corpus.Replace(II, {Data, Data + Size}); - return true; - } - return false; -} - -size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { - assert(InFuzzingThread()); - *Data = CurrentUnitData; - return CurrentUnitSize; -} - -void Fuzzer::CrashOnOverwrittenData() { - Printf("==%d== ERROR: libFuzzer: fuzz target overwrites it's const input\n", - GetPid()); - DumpCurrentUnit("crash-"); - Printf("SUMMARY: libFuzzer: out-of-memory\n"); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -// Compare two arrays, but not all bytes if the arrays are large. -static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { - const size_t Limit = 64; - if (Size <= 64) - return !memcmp(A, B, Size); - // Compare first and last Limit/2 bytes. - return !memcmp(A, B, Limit / 2) && - !memcmp(A + Size - Limit / 2, B + Size - Limit / 2, Limit / 2); -} - -void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { - TPC.RecordInitialStack(); - TotalNumberOfRuns++; - assert(InFuzzingThread()); - if (SMR.IsClient()) - SMR.WriteByteArray(Data, Size); - // We copy the contents of Unit into a separate heap buffer - // so that we reliably find buffer overflows in it. - uint8_t *DataCopy = new uint8_t[Size]; - memcpy(DataCopy, Data, Size); - if (CurrentUnitData && CurrentUnitData != Data) - memcpy(CurrentUnitData, Data, Size); - CurrentUnitSize = Size; - AllocTracer.Start(Options.TraceMalloc); - UnitStartTime = system_clock::now(); - TPC.ResetMaps(); - RunningCB = true; - int Res = CB(DataCopy, Size); - RunningCB = false; - UnitStopTime = system_clock::now(); - (void)Res; - assert(Res == 0); - HasMoreMallocsThanFrees = AllocTracer.Stop(); - if (!LooseMemeq(DataCopy, Data, Size)) - CrashOnOverwrittenData(); - CurrentUnitSize = 0; - delete[] DataCopy; -} - -void Fuzzer::WriteToOutputCorpus(const Unit &U) { - if (Options.OnlyASCII) - assert(IsASCII(U)); - if (Options.OutputCorpus.empty()) - return; - std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); - WriteToFile(U, Path); - if (Options.Verbosity >= 2) - Printf("Written %zd bytes to %s\n", U.size(), Path.c_str()); -} - -void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { - if (!Options.SaveArtifacts) - return; - std::string Path = Options.ArtifactPrefix + Prefix + Hash(U); - if (!Options.ExactArtifactPath.empty()) - Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix. - WriteToFile(U, Path); - Printf("artifact_prefix='%s'; Test unit written to %s\n", - Options.ArtifactPrefix.c_str(), Path.c_str()); - if (U.size() <= kMaxUnitSizeToPrint) - Printf("Base64: %s\n", Base64(U).c_str()); -} - -void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { - if (!Options.PrintNEW) - return; - PrintStats(Text, ""); - if (Options.Verbosity) { - Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); - MD.PrintMutationSequence(); - Printf("\n"); - } -} - -void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) { - II->NumSuccessfullMutations++; - MD.RecordSuccessfulMutationSequence(); - PrintStatusForNewUnit(U, II->Reduced ? "REDUCE" : - "NEW "); - WriteToOutputCorpus(U); - NumberOfNewUnitsAdded++; - CheckExitOnSrcPosOrItem(); // Check only after the unit is saved to corpus. - LastCorpusUpdateRun = TotalNumberOfRuns; - LastCorpusUpdateTime = system_clock::now(); -} - -// Tries detecting a memory leak on the particular input that we have just -// executed before calling this function. -void Fuzzer::TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, - bool DuringInitialCorpusExecution) { - if (!HasMoreMallocsThanFrees) return; // mallocs==frees, a leak is unlikely. - if (!Options.DetectLeaks) return; - if (!&(EF->__lsan_enable) || !&(EF->__lsan_disable) || - !(EF->__lsan_do_recoverable_leak_check)) - return; // No lsan. - // Run the target once again, but with lsan disabled so that if there is - // a real leak we do not report it twice. - EF->__lsan_disable(); - ExecuteCallback(Data, Size); - EF->__lsan_enable(); - if (!HasMoreMallocsThanFrees) return; // a leak is unlikely. - if (NumberOfLeakDetectionAttempts++ > 1000) { - Options.DetectLeaks = false; - Printf("INFO: libFuzzer disabled leak detection after every mutation.\n" - " Most likely the target function accumulates allocated\n" - " memory in a global state w/o actually leaking it.\n" - " You may try running this binary with -trace_malloc=[12]" - " to get a trace of mallocs and frees.\n" - " If LeakSanitizer is enabled in this process it will still\n" - " run on the process shutdown.\n"); - return; - } - // Now perform the actual lsan pass. This is expensive and we must ensure - // we don't call it too often. - if (EF->__lsan_do_recoverable_leak_check()) { // Leak is found, report it. - if (DuringInitialCorpusExecution) - Printf("\nINFO: a leak has been found in the initial corpus.\n\n"); - Printf("INFO: to ignore leaks on libFuzzer side use -detect_leaks=0.\n\n"); - CurrentUnitSize = Size; - DumpCurrentUnit("leak-"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // not exit() to disable lsan further on. - } -} - -void Fuzzer::MutateAndTestOne() { - MD.StartMutationSequence(); - - auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); - const auto &U = II.U; - memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); - assert(CurrentUnitData); - size_t Size = U.size(); - assert(Size <= MaxInputLen && "Oversized Unit"); - memcpy(CurrentUnitData, U.data(), Size); - - assert(MaxMutationLen > 0); - - size_t CurrentMaxMutationLen = - Min(MaxMutationLen, Max(U.size(), TmpMaxMutationLen)); - assert(CurrentMaxMutationLen > 0); - - for (int i = 0; i < Options.MutateDepth; i++) { - if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) - break; - size_t NewSize = 0; - NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen); - assert(NewSize > 0 && "Mutator returned empty unit"); - assert(NewSize <= CurrentMaxMutationLen && "Mutator return overisized unit"); - Size = NewSize; - II.NumExecutedMutations++; - if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II)) - ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); - - TryDetectingAMemoryLeak(CurrentUnitData, Size, - /*DuringInitialCorpusExecution*/ false); - } -} - -void Fuzzer::Loop() { - TPC.SetPrintNewPCs(Options.PrintNewCovPcs); - system_clock::time_point LastCorpusReload = system_clock::now(); - if (Options.DoCrossOver) - MD.SetCorpus(&Corpus); - while (true) { - auto Now = system_clock::now(); - if (duration_cast(Now - LastCorpusReload).count() >= - Options.ReloadIntervalSec) { - RereadOutputCorpus(MaxInputLen); - LastCorpusReload = system_clock::now(); - } - if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) - break; - if (TimedOut()) break; - - // Update TmpMaxMutationLen - if (Options.ExperimentalLenControl) { - if (TmpMaxMutationLen < MaxMutationLen && - (TotalNumberOfRuns - LastCorpusUpdateRun > 1000 && - duration_cast(Now - LastCorpusUpdateTime).count() >= 1)) { - LastCorpusUpdateRun = TotalNumberOfRuns; - LastCorpusUpdateTime = Now; - TmpMaxMutationLen = - Min(MaxMutationLen, - TmpMaxMutationLen + Max(size_t(4), TmpMaxMutationLen / 8)); - if (TmpMaxMutationLen <= MaxMutationLen) - Printf("#%zd\tTEMP_MAX_LEN: %zd\n", TotalNumberOfRuns, - TmpMaxMutationLen); - } - } else { - TmpMaxMutationLen = MaxMutationLen; - } - - // Perform several mutations and runs. - MutateAndTestOne(); - } - - PrintStats("DONE ", "\n"); - MD.PrintRecommendedDictionary(); -} - -void Fuzzer::MinimizeCrashLoop(const Unit &U) { - if (U.size() <= 1) return; - while (!TimedOut() && TotalNumberOfRuns < Options.MaxNumberOfRuns) { - MD.StartMutationSequence(); - memcpy(CurrentUnitData, U.data(), U.size()); - for (int i = 0; i < Options.MutateDepth; i++) { - size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); - assert(NewSize > 0 && NewSize <= MaxMutationLen); - ExecuteCallback(CurrentUnitData, NewSize); - PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); - TryDetectingAMemoryLeak(CurrentUnitData, NewSize, - /*DuringInitialCorpusExecution*/ false); - } - } -} - -void Fuzzer::AnnounceOutput(const uint8_t *Data, size_t Size) { - if (SMR.IsServer()) { - SMR.WriteByteArray(Data, Size); - } else if (SMR.IsClient()) { - SMR.PostClient(); - SMR.WaitServer(); - size_t OtherSize = SMR.ReadByteArraySize(); - uint8_t *OtherData = SMR.GetByteArray(); - if (Size != OtherSize || memcmp(Data, OtherData, Size) != 0) { - size_t i = 0; - for (i = 0; i < Min(Size, OtherSize); i++) - if (Data[i] != OtherData[i]) - break; - Printf("==%lu== ERROR: libFuzzer: equivalence-mismatch. Sizes: %zd %zd; " - "offset %zd\n", GetPid(), Size, OtherSize, i); - DumpCurrentUnit("mismatch-"); - Printf("SUMMARY: libFuzzer: equivalence-mismatch\n"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); - } - } -} - -} // namespace fuzzer - -extern "C" { - -size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { - assert(fuzzer::F); - return fuzzer::F->GetMD().DefaultMutate(Data, Size, MaxSize); -} - -// Experimental -void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size) { - assert(fuzzer::F); - fuzzer::F->AnnounceOutput(Data, Size); -} -} // extern "C" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMain.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMain.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMain.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMain.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -//===- FuzzerMain.cpp - main() function and flags -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// main() and flags. -//===----------------------------------------------------------------------===// - -#include "FuzzerDefs.h" - -extern "C" { -// This function should be defined by the user. -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); -} // extern "C" - -int main(int argc, char **argv) { - return fuzzer::FuzzerDriver(&argc, &argv, LLVMFuzzerTestOneInput); -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMerge.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMerge.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMerge.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMerge.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,338 +0,0 @@ -//===- FuzzerMerge.cpp - merging corpora ----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Merging corpora. -//===----------------------------------------------------------------------===// - -#include "FuzzerMerge.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include "FuzzerTracePC.h" -#include "FuzzerUtil.h" - -#include -#include -#include -#include - -namespace fuzzer { - -bool Merger::Parse(const std::string &Str, bool ParseCoverage) { - std::istringstream SS(Str); - return Parse(SS, ParseCoverage); -} - -void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { - if (!Parse(IS, ParseCoverage)) { - Printf("MERGE: failed to parse the control file (unexpected error)\n"); - exit(1); - } -} - -// The control file example: -// -// 3 # The number of inputs -// 1 # The number of inputs in the first corpus, <= the previous number -// file0 -// file1 -// file2 # One file name per line. -// STARTED 0 123 # FileID, file size -// DONE 0 1 4 6 8 # FileID COV1 COV2 ... -// STARTED 1 456 # If DONE is missing, the input crashed while processing. -// STARTED 2 567 -// DONE 2 8 9 -bool Merger::Parse(std::istream &IS, bool ParseCoverage) { - LastFailure.clear(); - std::string Line; - - // Parse NumFiles. - if (!std::getline(IS, Line, '\n')) return false; - std::istringstream L1(Line); - size_t NumFiles = 0; - L1 >> NumFiles; - if (NumFiles == 0 || NumFiles > 10000000) return false; - - // Parse NumFilesInFirstCorpus. - if (!std::getline(IS, Line, '\n')) return false; - std::istringstream L2(Line); - NumFilesInFirstCorpus = NumFiles + 1; - L2 >> NumFilesInFirstCorpus; - if (NumFilesInFirstCorpus > NumFiles) return false; - - // Parse file names. - Files.resize(NumFiles); - for (size_t i = 0; i < NumFiles; i++) - if (!std::getline(IS, Files[i].Name, '\n')) - return false; - - // Parse STARTED and DONE lines. - size_t ExpectedStartMarker = 0; - const size_t kInvalidStartMarker = -1; - size_t LastSeenStartMarker = kInvalidStartMarker; - std::vector TmpFeatures; - while (std::getline(IS, Line, '\n')) { - std::istringstream ISS1(Line); - std::string Marker; - size_t N; - ISS1 >> Marker; - ISS1 >> N; - if (Marker == "STARTED") { - // STARTED FILE_ID FILE_SIZE - if (ExpectedStartMarker != N) - return false; - ISS1 >> Files[ExpectedStartMarker].Size; - LastSeenStartMarker = ExpectedStartMarker; - assert(ExpectedStartMarker < Files.size()); - ExpectedStartMarker++; - } else if (Marker == "DONE") { - // DONE FILE_ID COV1 COV2 COV3 ... - size_t CurrentFileIdx = N; - if (CurrentFileIdx != LastSeenStartMarker) - return false; - LastSeenStartMarker = kInvalidStartMarker; - if (ParseCoverage) { - TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. - while (ISS1 >> std::hex >> N) - TmpFeatures.push_back(N); - std::sort(TmpFeatures.begin(), TmpFeatures.end()); - Files[CurrentFileIdx].Features = TmpFeatures; - } - } else { - return false; - } - } - if (LastSeenStartMarker != kInvalidStartMarker) - LastFailure = Files[LastSeenStartMarker].Name; - - FirstNotProcessedFile = ExpectedStartMarker; - return true; -} - -size_t Merger::ApproximateMemoryConsumption() const { - size_t Res = 0; - for (const auto &F: Files) - Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); - return Res; -} - -// Decides which files need to be merged (add thost to NewFiles). -// Returns the number of new features added. -size_t Merger::Merge(const std::set &InitialFeatures, - std::vector *NewFiles) { - NewFiles->clear(); - assert(NumFilesInFirstCorpus <= Files.size()); - std::set AllFeatures(InitialFeatures); - - // What features are in the initial corpus? - for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { - auto &Cur = Files[i].Features; - AllFeatures.insert(Cur.begin(), Cur.end()); - } - size_t InitialNumFeatures = AllFeatures.size(); - - // Remove all features that we already know from all other inputs. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - std::vector Tmp; - std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), - AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); - Cur.swap(Tmp); - } - - // Sort. Give preference to - // * smaller files - // * files with more features. - std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(), - [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool { - if (a.Size != b.Size) - return a.Size < b.Size; - return a.Features.size() > b.Features.size(); - }); - - // One greedy pass: add the file's features to AllFeatures. - // If new features were added, add this file to NewFiles. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), - // Files[i].Size, Cur.size()); - size_t OldSize = AllFeatures.size(); - AllFeatures.insert(Cur.begin(), Cur.end()); - if (AllFeatures.size() > OldSize) - NewFiles->push_back(Files[i].Name); - } - return AllFeatures.size() - InitialNumFeatures; -} - -void Merger::PrintSummary(std::ostream &OS) { - for (auto &File : Files) { - OS << std::hex; - OS << File.Name << " size: " << File.Size << " features: "; - for (auto Feature : File.Features) - OS << " " << Feature; - OS << "\n"; - } -} - -std::set Merger::AllFeatures() const { - std::set S; - for (auto &File : Files) - S.insert(File.Features.begin(), File.Features.end()); - return S; -} - -std::set Merger::ParseSummary(std::istream &IS) { - std::string Line, Tmp; - std::set Res; - while (std::getline(IS, Line, '\n')) { - size_t N; - std::istringstream ISS1(Line); - ISS1 >> Tmp; // Name - ISS1 >> Tmp; // size: - assert(Tmp == "size:" && "Corrupt summary file"); - ISS1 >> std::hex; - ISS1 >> N; // File Size - ISS1 >> Tmp; // features: - assert(Tmp == "features:" && "Corrupt summary file"); - while (ISS1 >> std::hex >> N) - Res.insert(N); - } - return Res; -} - -// Inner process. May crash if the target crashes. -void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { - Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); - Merger M; - std::ifstream IF(CFPath); - M.ParseOrExit(IF, false); - IF.close(); - if (!M.LastFailure.empty()) - Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", - M.LastFailure.c_str()); - - Printf("MERGE-INNER: %zd total files;" - " %zd processed earlier; will process %zd files now\n", - M.Files.size(), M.FirstNotProcessedFile, - M.Files.size() - M.FirstNotProcessedFile); - - std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); - for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { - auto U = FileToVector(M.Files[i].Name); - if (U.size() > MaxInputLen) { - U.resize(MaxInputLen); - U.shrink_to_fit(); - } - std::ostringstream StartedLine; - // Write the pre-run marker. - OF << "STARTED " << std::dec << i << " " << U.size() << "\n"; - OF.flush(); // Flush is important since ExecuteCommand may crash. - // Run. - TPC.ResetMaps(); - ExecuteCallback(U.data(), U.size()); - // Collect coverage. - std::set Features; - TPC.CollectFeatures([&](size_t Feature) -> bool { - Features.insert(Feature); - return true; - }); - // Show stats. - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) - PrintStats("pulse "); - // Write the post-run marker and the coverage. - OF << "DONE " << i; - for (size_t F : Features) - OF << " " << std::hex << F; - OF << "\n"; - } -} - -// Outer process. Does not call the target code and thus sohuld not fail. -void Fuzzer::CrashResistantMerge(const std::vector &Args, - const std::vector &Corpora, - const char *CoverageSummaryInputPathOrNull, - const char *CoverageSummaryOutputPathOrNull) { - if (Corpora.size() <= 1) { - Printf("Merge requires two or more corpus dirs\n"); - return; - } - std::vector AllFiles; - ListFilesInDirRecursive(Corpora[0], nullptr, &AllFiles, /*TopDir*/true); - size_t NumFilesInFirstCorpus = AllFiles.size(); - for (size_t i = 1; i < Corpora.size(); i++) - ListFilesInDirRecursive(Corpora[i], nullptr, &AllFiles, /*TopDir*/true); - Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n", - AllFiles.size(), NumFilesInFirstCorpus); - auto CFPath = DirPlusFile(TmpDir(), - "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); - // Write the control file. - RemoveFile(CFPath); - std::ofstream ControlFile(CFPath); - ControlFile << AllFiles.size() << "\n"; - ControlFile << NumFilesInFirstCorpus << "\n"; - for (auto &Path: AllFiles) - ControlFile << Path << "\n"; - if (!ControlFile) { - Printf("MERGE-OUTER: failed to write to the control file: %s\n", - CFPath.c_str()); - exit(1); - } - ControlFile.close(); - - // Execute the inner process untill it passes. - // Every inner process should execute at least one input. - auto BaseCmd = SplitBefore("-ignore_remaining_args=1", - CloneArgsWithoutX(Args, "keep-all-flags")); - bool Success = false; - for (size_t i = 1; i <= AllFiles.size(); i++) { - Printf("MERGE-OUTER: attempt %zd\n", i); - auto ExitCode = ExecuteCommand(BaseCmd.first + " -merge_control_file=" + - CFPath + " " + BaseCmd.second); - if (!ExitCode) { - Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i); - Success = true; - break; - } - } - if (!Success) { - Printf("MERGE-OUTER: zero succesfull attempts, exiting\n"); - exit(1); - } - // Read the control file and do the merge. - Merger M; - std::ifstream IF(CFPath); - IF.seekg(0, IF.end); - Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg()); - IF.seekg(0, IF.beg); - M.ParseOrExit(IF, true); - IF.close(); - Printf("MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", - M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); - if (CoverageSummaryOutputPathOrNull) { - Printf("MERGE-OUTER: writing coverage summary for %zd files to %s\n", - M.Files.size(), CoverageSummaryOutputPathOrNull); - std::ofstream SummaryOut(CoverageSummaryOutputPathOrNull); - M.PrintSummary(SummaryOut); - } - std::vector NewFiles; - std::set InitialFeatures; - if (CoverageSummaryInputPathOrNull) { - std::ifstream SummaryIn(CoverageSummaryInputPathOrNull); - InitialFeatures = M.ParseSummary(SummaryIn); - Printf("MERGE-OUTER: coverage summary loaded from %s, %zd features found\n", - CoverageSummaryInputPathOrNull, InitialFeatures.size()); - } - size_t NumNewFeatures = M.Merge(InitialFeatures, &NewFiles); - Printf("MERGE-OUTER: %zd new files with %zd new features added\n", - NewFiles.size(), NumNewFeatures); - for (auto &F: NewFiles) - WriteToOutputCorpus(FileToVector(F)); - // We are done, delete the control file. - RemoveFile(CFPath); -} - -} // namespace fuzzer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMerge.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMerge.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMerge.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMerge.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,80 +0,0 @@ -//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Merging Corpora. -// -// The task: -// Take the existing corpus (possibly empty) and merge new inputs into -// it so that only inputs with new coverage ('features') are added. -// The process should tolerate the crashes, OOMs, leaks, etc. -// -// Algorithm: -// The outter process collects the set of files and writes their names -// into a temporary "control" file, then repeatedly launches the inner -// process until all inputs are processed. -// The outer process does not actually execute the target code. -// -// The inner process reads the control file and sees a) list of all the inputs -// and b) the last processed input. Then it starts processing the inputs one -// by one. Before processing every input it writes one line to control file: -// STARTED INPUT_ID INPUT_SIZE -// After processing an input it write another line: -// DONE INPUT_ID Feature1 Feature2 Feature3 ... -// If a crash happens while processing an input the last line in the control -// file will be "STARTED INPUT_ID" and so the next process will know -// where to resume. -// -// Once all inputs are processed by the innner process(es) the outer process -// reads the control files and does the merge based entirely on the contents -// of control file. -// It uses a single pass greedy algorithm choosing first the smallest inputs -// within the same size the inputs that have more new features. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MERGE_H -#define LLVM_FUZZER_MERGE_H - -#include "FuzzerDefs.h" - -#include -#include -#include -#include - -namespace fuzzer { - -struct MergeFileInfo { - std::string Name; - size_t Size = 0; - std::vector Features; -}; - -struct Merger { - std::vector Files; - size_t NumFilesInFirstCorpus = 0; - size_t FirstNotProcessedFile = 0; - std::string LastFailure; - - bool Parse(std::istream &IS, bool ParseCoverage); - bool Parse(const std::string &Str, bool ParseCoverage); - void ParseOrExit(std::istream &IS, bool ParseCoverage); - void PrintSummary(std::ostream &OS); - std::set ParseSummary(std::istream &IS); - size_t Merge(const std::set &InitialFeatures, - std::vector *NewFiles); - size_t Merge(std::vector *NewFiles) { - return Merge(std::set{}, NewFiles); - } - size_t ApproximateMemoryConsumption() const; - std::set AllFeatures() const; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_MERGE_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMutate.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMutate.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMutate.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMutate.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,533 +0,0 @@ -//===- FuzzerMutate.cpp - Mutate a test input -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Mutate a test input. -//===----------------------------------------------------------------------===// - -#include "FuzzerMutate.h" -#include "FuzzerCorpus.h" -#include "FuzzerDefs.h" -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "FuzzerOptions.h" - -namespace fuzzer { - -const size_t Dictionary::kMaxDictSize; - -static void PrintASCII(const Word &W, const char *PrintAfter) { - PrintASCII(W.data(), W.size(), PrintAfter); -} - -MutationDispatcher::MutationDispatcher(Random &Rand, - const FuzzingOptions &Options) - : Rand(Rand), Options(Options) { - DefaultMutators.insert( - DefaultMutators.begin(), - { - {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, - {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, - {&MutationDispatcher::Mutate_InsertRepeatedBytes, - "InsertRepeatedBytes"}, - {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, - {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, - {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, - {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, - {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, - {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, - {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, - {&MutationDispatcher::Mutate_AddWordFromManualDictionary, - "ManualDict"}, - {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, - "PersAutoDict"}, - }); - if(Options.UseCmp) - DefaultMutators.push_back( - {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); - - if (EF->LLVMFuzzerCustomMutator) - Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); - else - Mutators = DefaultMutators; - - if (EF->LLVMFuzzerCustomCrossOver) - Mutators.push_back( - {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); -} - -static char RandCh(Random &Rand) { - if (Rand.RandBool()) return Rand(256); - const char *Special = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; - return Special[Rand(sizeof(Special) - 1)]; -} - -size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, - size_t MaxSize) { - return EF->LLVMFuzzerCustomMutator(Data, Size, MaxSize, Rand.Rand()); -} - -size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (!Corpus || Corpus->size() < 2 || Size == 0) - return 0; - size_t Idx = Rand(Corpus->size()); - const Unit &Other = (*Corpus)[Idx]; - if (Other.empty()) - return 0; - CustomCrossOverInPlaceHere.resize(MaxSize); - auto &U = CustomCrossOverInPlaceHere; - size_t NewSize = EF->LLVMFuzzerCustomCrossOver( - Data, Size, Other.data(), Other.size(), U.data(), U.size(), Rand.Rand()); - if (!NewSize) - return 0; - assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); - return NewSize; -} - -size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) return 0; - size_t ShuffleAmount = - Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. - size_t ShuffleStart = Rand(Size - ShuffleAmount); - assert(ShuffleStart + ShuffleAmount <= Size); - std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size <= 1) return 0; - size_t N = Rand(Size / 2) + 1; - assert(N < Size); - size_t Idx = Rand(Size - N + 1); - // Erase Data[Idx:Idx+N]. - memmove(Data + Idx, Data + Idx + N, Size - Idx - N); - // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); - return Size - N; -} - -size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size >= MaxSize) return 0; - size_t Idx = Rand(Size + 1); - // Insert new value at Data[Idx]. - memmove(Data + Idx + 1, Data + Idx, Size - Idx); - Data[Idx] = RandCh(Rand); - return Size + 1; -} - -size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, - size_t Size, - size_t MaxSize) { - const size_t kMinBytesToInsert = 3; - if (Size + kMinBytesToInsert >= MaxSize) return 0; - size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); - size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; - assert(Size + N <= MaxSize && N); - size_t Idx = Rand(Size + 1); - // Insert new values at Data[Idx]. - memmove(Data + Idx + N, Data + Idx, Size - Idx); - // Give preference to 0x00 and 0xff. - uint8_t Byte = Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255); - for (size_t i = 0; i < N; i++) - Data[Idx + i] = Byte; - return Size + N; -} - -size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t Idx = Rand(Size); - Data[Idx] = RandCh(Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t Idx = Rand(Size); - Data[Idx] ^= 1 << Rand(8); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, - size_t Size, - size_t MaxSize) { - return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, - size_t MaxSize, - DictionaryEntry &DE) { - const Word &W = DE.GetW(); - bool UsePositionHint = DE.HasPositionHint() && - DE.GetPositionHint() + W.size() < Size && - Rand.RandBool(); - if (Rand.RandBool()) { // Insert W. - if (Size + W.size() > MaxSize) return 0; - size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); - memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); - memcpy(Data + Idx, W.data(), W.size()); - Size += W.size(); - } else { // Overwrite some bytes with W. - if (W.size() > Size) return 0; - size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size - W.size()); - memcpy(Data + Idx, W.data(), W.size()); - } - return Size; -} - -// Somewhere in the past we have observed a comparison instructions -// with arguments Arg1 Arg2. This function tries to guess a dictionary -// entry that will satisfy that comparison. -// It first tries to find one of the arguments (possibly swapped) in the -// input and if it succeeds it creates a DE with a position hint. -// Otherwise it creates a DE with one of the arguments w/o a position hint. -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const void *Arg1, const void *Arg2, - const void *Arg1Mutation, const void *Arg2Mutation, - size_t ArgSize, const uint8_t *Data, - size_t Size) { - ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str; - bool HandleFirst = Rand.RandBool(); - const void *ExistingBytes, *DesiredBytes; - Word W; - const uint8_t *End = Data + Size; - for (int Arg = 0; Arg < 2; Arg++) { - ExistingBytes = HandleFirst ? Arg1 : Arg2; - DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; - HandleFirst = !HandleFirst; - W.Set(reinterpret_cast(DesiredBytes), ArgSize); - const size_t kMaxNumPositions = 8; - size_t Positions[kMaxNumPositions]; - size_t NumPositions = 0; - for (const uint8_t *Cur = Data; - Cur < End && NumPositions < kMaxNumPositions; Cur++) { - Cur = - (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); - if (!Cur) break; - Positions[NumPositions++] = Cur - Data; - } - if (!NumPositions) continue; - return DictionaryEntry(W, Positions[Rand(NumPositions)]); - } - DictionaryEntry DE(W); - return DE; -} - - -template -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - T Arg1, T Arg2, const uint8_t *Data, size_t Size) { - if (Rand.RandBool()) Arg1 = Bswap(Arg1); - if (Rand.RandBool()) Arg2 = Bswap(Arg2); - T Arg1Mutation = Arg1 + Rand(-1, 1); - T Arg2Mutation = Arg2 + Rand(-1, 1); - return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, - sizeof(Arg1), Data, Size); -} - -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) { - return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(), - Arg2.data(), Arg1.size(), Data, Size); -} - -size_t MutationDispatcher::Mutate_AddWordFromTORC( - uint8_t *Data, size_t Size, size_t MaxSize) { - Word W; - DictionaryEntry DE; - switch (Rand(4)) { - case 0: { - auto X = TPC.TORC8.Get(Rand.Rand()); - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 1: { - auto X = TPC.TORC4.Get(Rand.Rand()); - if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool()) - DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size); - else - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 2: { - auto X = TPC.TORCW.Get(Rand.Rand()); - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 3: if (Options.UseMemmem) { - auto X = TPC.MMT.Get(Rand.Rand()); - DE = DictionaryEntry(X); - } break; - default: - assert(0); - } - if (!DE.GetW().size()) return 0; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) return 0; - DictionaryEntry &DERef = - CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % - kCmpDictionaryEntriesDequeSize]; - DERef = DE; - CurrentDictionaryEntrySequence.push_back(&DERef); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( - uint8_t *Data, size_t Size, size_t MaxSize) { - return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, - size_t Size, size_t MaxSize) { - if (Size > MaxSize) return 0; - if (D.empty()) return 0; - DictionaryEntry &DE = D[Rand(D.size())]; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) return 0; - DE.IncUseCount(); - CurrentDictionaryEntrySequence.push_back(&DE); - return Size; -} - -// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). -// Returns ToSize. -size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize) { - // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). - size_t ToBeg = Rand(ToSize); - size_t CopySize = Rand(ToSize - ToBeg) + 1; - assert(ToBeg + CopySize <= ToSize); - CopySize = std::min(CopySize, FromSize); - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - memmove(To + ToBeg, From + FromBeg, CopySize); - return ToSize; -} - -// Inserts part of From[0,ToSize) into To. -// Returns new size of To on success or 0 on failure. -size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize, - size_t MaxToSize) { - if (ToSize >= MaxToSize) return 0; - size_t AvailableSpace = MaxToSize - ToSize; - size_t MaxCopySize = std::min(AvailableSpace, FromSize); - size_t CopySize = Rand(MaxCopySize) + 1; - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - size_t ToInsertPos = Rand(ToSize + 1); - assert(ToInsertPos + CopySize <= MaxToSize); - size_t TailSize = ToSize - ToInsertPos; - if (To == From) { - MutateInPlaceHere.resize(MaxToSize); - memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); - } else { - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, From + FromBeg, CopySize); - } - return ToSize + CopySize; -} - -size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) return 0; - if (Rand.RandBool()) - return CopyPartOf(Data, Size, Data, Size); - else - return InsertPartOf(Data, Size, Data, Size, MaxSize); -} - -size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t B = Rand(Size); - while (B < Size && !isdigit(Data[B])) B++; - if (B == Size) return 0; - size_t E = B; - while (E < Size && isdigit(Data[E])) E++; - assert(B < E); - // now we have digits in [B, E). - // strtol and friends don't accept non-zero-teminated data, parse it manually. - uint64_t Val = Data[B] - '0'; - for (size_t i = B + 1; i < E; i++) - Val = Val * 10 + Data[i] - '0'; - - // Mutate the integer value. - switch(Rand(5)) { - case 0: Val++; break; - case 1: Val--; break; - case 2: Val /= 2; break; - case 3: Val *= 2; break; - case 4: Val = Rand(Val * Val); break; - default: assert(0); - } - // Just replace the bytes with the new ones, don't bother moving bytes. - for (size_t i = B; i < E; i++) { - size_t Idx = E + B - i - 1; - assert(Idx >= B && Idx < E); - Data[Idx] = (Val % 10) + '0'; - Val /= 10; - } - return Size; -} - -template -size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { - if (Size < sizeof(T)) return 0; - size_t Off = Rand(Size - sizeof(T) + 1); - assert(Off + sizeof(T) <= Size); - T Val; - if (Off < 64 && !Rand(4)) { - Val = Size; - if (Rand.RandBool()) - Val = Bswap(Val); - } else { - memcpy(&Val, Data + Off, sizeof(Val)); - T Add = Rand(21); - Add -= 10; - if (Rand.RandBool()) - Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. - else - Val = Val + Add; // Add assuming current endiannes. - if (Add == 0 || Rand.RandBool()) // Maybe negate. - Val = -Val; - } - memcpy(Data + Off, &Val, sizeof(Val)); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, - size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - switch (Rand(4)) { - case 3: return ChangeBinaryInteger(Data, Size, Rand); - case 2: return ChangeBinaryInteger(Data, Size, Rand); - case 1: return ChangeBinaryInteger(Data, Size, Rand); - case 0: return ChangeBinaryInteger(Data, Size, Rand); - default: assert(0); - } - return 0; -} - -size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - if (!Corpus || Corpus->size() < 2 || Size == 0) return 0; - size_t Idx = Rand(Corpus->size()); - const Unit &O = (*Corpus)[Idx]; - if (O.empty()) return 0; - MutateInPlaceHere.resize(MaxSize); - auto &U = MutateInPlaceHere; - size_t NewSize = 0; - switch(Rand(3)) { - case 0: - NewSize = CrossOver(Data, Size, O.data(), O.size(), U.data(), U.size()); - break; - case 1: - NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize); - if (!NewSize) - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); - break; - case 2: - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); - break; - default: assert(0); - } - assert(NewSize > 0 && "CrossOver returned empty unit"); - assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); - return NewSize; -} - -void MutationDispatcher::StartMutationSequence() { - CurrentMutatorSequence.clear(); - CurrentDictionaryEntrySequence.clear(); -} - -// Copy successful dictionary entries to PersistentAutoDictionary. -void MutationDispatcher::RecordSuccessfulMutationSequence() { - for (auto DE : CurrentDictionaryEntrySequence) { - // PersistentAutoDictionary.AddWithSuccessCountOne(DE); - DE->IncSuccessCount(); - assert(DE->GetW().size()); - // Linear search is fine here as this happens seldom. - if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) - PersistentAutoDictionary.push_back({DE->GetW(), 1}); - } -} - -void MutationDispatcher::PrintRecommendedDictionary() { - std::vector V; - for (auto &DE : PersistentAutoDictionary) - if (!ManualDictionary.ContainsWord(DE.GetW())) - V.push_back(DE); - if (V.empty()) return; - Printf("###### Recommended dictionary. ######\n"); - for (auto &DE: V) { - assert(DE.GetW().size()); - Printf("\""); - PrintASCII(DE.GetW(), "\""); - Printf(" # Uses: %zd\n", DE.GetUseCount()); - } - Printf("###### End of recommended dictionary. ######\n"); -} - -void MutationDispatcher::PrintMutationSequence() { - Printf("MS: %zd ", CurrentMutatorSequence.size()); - for (auto M : CurrentMutatorSequence) - Printf("%s-", M.Name); - if (!CurrentDictionaryEntrySequence.empty()) { - Printf(" DE: "); - for (auto DE : CurrentDictionaryEntrySequence) { - Printf("\""); - PrintASCII(DE->GetW(), "\"-"); - } - } -} - -size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, Mutators); -} - -size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, - size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, DefaultMutators); -} - -// Mutates Data in place, returns new size. -size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, - size_t MaxSize, - const std::vector &Mutators) { - assert(MaxSize > 0); - // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), - // in which case they will return 0. - // Try several times before returning un-mutated data. - for (int Iter = 0; Iter < 100; Iter++) { - auto M = Mutators[Rand(Mutators.size())]; - size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); - if (NewSize && NewSize <= MaxSize) { - if (Options.OnlyASCII) - ToASCII(Data, NewSize); - CurrentMutatorSequence.push_back(M); - return NewSize; - } - } - *Data = ' '; - return 1; // Fallback, should not happen frequently. -} - -void MutationDispatcher::AddWordToManualDictionary(const Word &W) { - ManualDictionary.push_back( - {W, std::numeric_limits::max()}); -} - -} // namespace fuzzer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMutate.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMutate.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerMutate.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerMutate.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,150 +0,0 @@ -//===- FuzzerMutate.h - Internal header for the Fuzzer ----------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::MutationDispatcher -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTATE_H -#define LLVM_FUZZER_MUTATE_H - -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" -#include "FuzzerOptions.h" -#include "FuzzerRandom.h" - -namespace fuzzer { - -class MutationDispatcher { -public: - MutationDispatcher(Random &Rand, const FuzzingOptions &Options); - ~MutationDispatcher() {} - /// Indicate that we are about to start a new sequence of mutations. - void StartMutationSequence(); - /// Print the current sequence of mutations. - void PrintMutationSequence(); - /// Indicate that the current sequence of mutations was successfull. - void RecordSuccessfulMutationSequence(); - /// Mutates data by invoking user-provided mutator. - size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by invoking user-provided crossover. - size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by shuffling bytes. - size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by erasing bytes. - size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting a byte. - size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting several repeated bytes. - size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by chanding one byte. - size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by chanding one bit. - size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by copying/inserting a part of data into a different place. - size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Mutates data by adding a word from the manual dictionary. - size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - - /// Mutates data by adding a word from the TORC. - size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Mutates data by adding a word from the persistent automatic dictionary. - size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - - /// Tries to find an ASCII integer in Data, changes it to another ASCII int. - size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); - /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. - size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); - - /// CrossOver Data with some other element of the corpus. - size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Applies one of the configured mutations. - /// Returns the new size of data which could be up to MaxSize. - size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); - /// Applies one of the default mutations. Provided as a service - /// to mutation authors. - size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Creates a cross-over of two pieces of Data, returns its size. - size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, - size_t Size2, uint8_t *Out, size_t MaxOutSize); - - void AddWordToManualDictionary(const Word &W); - - void PrintRecommendedDictionary(); - - void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; } - - Random &GetRand() { return Rand; } - -private: - - struct Mutator { - size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); - const char *Name; - }; - - size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, - size_t MaxSize); - size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, - const std::vector &Mutators); - - size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize, size_t MaxToSize); - size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize); - size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, - DictionaryEntry &DE); - - template - DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, - const void *Arg1Mutation, - const void *Arg2Mutation, - size_t ArgSize, - const uint8_t *Data, size_t Size); - - Random &Rand; - const FuzzingOptions Options; - - // Dictionary provided by the user via -dict=DICT_FILE. - Dictionary ManualDictionary; - // Temporary dictionary modified by the fuzzer itself, - // recreated periodically. - Dictionary TempAutoDictionary; - // Persistent dictionary modified by the fuzzer, consists of - // entries that led to successfull discoveries in the past mutations. - Dictionary PersistentAutoDictionary; - - std::vector CurrentMutatorSequence; - std::vector CurrentDictionaryEntrySequence; - - static const size_t kCmpDictionaryEntriesDequeSize = 16; - DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; - size_t CmpDictionaryEntriesDequeIdx = 0; - - const InputCorpus *Corpus = nullptr; - std::vector MutateInPlaceHere; - // CustomCrossOver needs its own buffer as a custom implementation may call - // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. - std::vector CustomCrossOverInPlaceHere; - - std::vector Mutators; - std::vector DefaultMutators; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_MUTATE_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerOptions.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerOptions.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerOptions.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerOptions.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,68 +0,0 @@ -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::FuzzingOptions -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_OPTIONS_H -#define LLVM_FUZZER_OPTIONS_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -struct FuzzingOptions { - int Verbosity = 1; - size_t MaxLen = 0; - bool ExperimentalLenControl = false; - int UnitTimeoutSec = 300; - int TimeoutExitCode = 77; - int ErrorExitCode = 77; - int MaxTotalTimeSec = 0; - int RssLimitMb = 0; - bool DoCrossOver = true; - int MutateDepth = 5; - bool UseCounters = false; - bool UseIndirCalls = true; - bool UseMemmem = true; - bool UseCmp = false; - bool UseValueProfile = false; - bool Shrink = false; - bool ReduceInputs = false; - int ReloadIntervalSec = 1; - bool ShuffleAtStartUp = true; - bool PreferSmall = true; - size_t MaxNumberOfRuns = -1L; - int ReportSlowUnits = 10; - bool OnlyASCII = false; - std::string OutputCorpus; - std::string ArtifactPrefix = "./"; - std::string ExactArtifactPath; - std::string ExitOnSrcPos; - std::string ExitOnItem; - bool SaveArtifacts = true; - bool PrintNEW = true; // Print a status line when new units are found; - bool PrintNewCovPcs = false; - bool PrintFinalStats = false; - bool PrintCorpusStats = false; - bool PrintCoverage = false; - bool DumpCoverage = false; - bool DetectLeaks = true; - int TraceMalloc = 0; - bool HandleAbrt = false; - bool HandleBus = false; - bool HandleFpe = false; - bool HandleIll = false; - bool HandleInt = false; - bool HandleSegv = false; - bool HandleTerm = false; - bool HandleXfsz = false; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_OPTIONS_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerRandom.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerRandom.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerRandom.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerRandom.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -//===- FuzzerRandom.h - Internal header for the Fuzzer ----------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::Random -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_RANDOM_H -#define LLVM_FUZZER_RANDOM_H - -#include - -namespace fuzzer { -class Random : public std::mt19937 { - public: - Random(unsigned int seed) : std::mt19937(seed) {} - result_type operator()() { return this->std::mt19937::operator()(); } - size_t Rand() { return this->operator()(); } - size_t RandBool() { return Rand() % 2; } - size_t operator()(size_t n) { return n ? Rand() % n : 0; } - intptr_t operator()(intptr_t From, intptr_t To) { - assert(From < To); - intptr_t RangeSize = To - From + 1; - return operator()(RangeSize) + From; - } -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_RANDOM_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerSHA1.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerSHA1.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerSHA1.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerSHA1.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,222 +0,0 @@ -//===- FuzzerSHA1.h - Private copy of the SHA1 implementation ---*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This code is taken from public domain -// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c) -// and modified by adding anonymous namespace, adding an interface -// function fuzzer::ComputeSHA1() and removing unnecessary code. -// -// lib/Fuzzer can not use SHA1 implementation from openssl because -// openssl may not be available and because we may be fuzzing openssl itself. -// For the same reason we do not want to depend on SHA1 from LLVM tree. -//===----------------------------------------------------------------------===// - -#include "FuzzerSHA1.h" -#include "FuzzerDefs.h" - -/* This code is public-domain - it is based on libcrypt - * placed in the public domain by Wei Dai and other contributors. - */ - -#include -#include -#include -#include - -namespace { // Added for LibFuzzer - -#ifdef __BIG_ENDIAN__ -# define SHA_BIG_ENDIAN -#elif defined __LITTLE_ENDIAN__ -/* override */ -#elif defined __BYTE_ORDER -# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define SHA_BIG_ENDIAN -# endif -#else // ! defined __LITTLE_ENDIAN__ -# include // machine/endian.h -# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define SHA_BIG_ENDIAN -# endif -#endif - - -/* header */ - -#define HASH_LENGTH 20 -#define BLOCK_LENGTH 64 - -typedef struct sha1nfo { - uint32_t buffer[BLOCK_LENGTH/4]; - uint32_t state[HASH_LENGTH/4]; - uint32_t byteCount; - uint8_t bufferOffset; - uint8_t keyBuffer[BLOCK_LENGTH]; - uint8_t innerHash[HASH_LENGTH]; -} sha1nfo; - -/* public API - prototypes - TODO: doxygen*/ - -/** - */ -void sha1_init(sha1nfo *s); -/** - */ -void sha1_writebyte(sha1nfo *s, uint8_t data); -/** - */ -void sha1_write(sha1nfo *s, const char *data, size_t len); -/** - */ -uint8_t* sha1_result(sha1nfo *s); - - -/* code */ -#define SHA1_K0 0x5a827999 -#define SHA1_K20 0x6ed9eba1 -#define SHA1_K40 0x8f1bbcdc -#define SHA1_K60 0xca62c1d6 - -void sha1_init(sha1nfo *s) { - s->state[0] = 0x67452301; - s->state[1] = 0xefcdab89; - s->state[2] = 0x98badcfe; - s->state[3] = 0x10325476; - s->state[4] = 0xc3d2e1f0; - s->byteCount = 0; - s->bufferOffset = 0; -} - -uint32_t sha1_rol32(uint32_t number, uint8_t bits) { - return ((number << bits) | (number >> (32-bits))); -} - -void sha1_hashBlock(sha1nfo *s) { - uint8_t i; - uint32_t a,b,c,d,e,t; - - a=s->state[0]; - b=s->state[1]; - c=s->state[2]; - d=s->state[3]; - e=s->state[4]; - for (i=0; i<80; i++) { - if (i>=16) { - t = s->buffer[(i+13)&15] ^ s->buffer[(i+8)&15] ^ s->buffer[(i+2)&15] ^ s->buffer[i&15]; - s->buffer[i&15] = sha1_rol32(t,1); - } - if (i<20) { - t = (d ^ (b & (c ^ d))) + SHA1_K0; - } else if (i<40) { - t = (b ^ c ^ d) + SHA1_K20; - } else if (i<60) { - t = ((b & c) | (d & (b | c))) + SHA1_K40; - } else { - t = (b ^ c ^ d) + SHA1_K60; - } - t+=sha1_rol32(a,5) + e + s->buffer[i&15]; - e=d; - d=c; - c=sha1_rol32(b,30); - b=a; - a=t; - } - s->state[0] += a; - s->state[1] += b; - s->state[2] += c; - s->state[3] += d; - s->state[4] += e; -} - -void sha1_addUncounted(sha1nfo *s, uint8_t data) { - uint8_t * const b = (uint8_t*) s->buffer; -#ifdef SHA_BIG_ENDIAN - b[s->bufferOffset] = data; -#else - b[s->bufferOffset ^ 3] = data; -#endif - s->bufferOffset++; - if (s->bufferOffset == BLOCK_LENGTH) { - sha1_hashBlock(s); - s->bufferOffset = 0; - } -} - -void sha1_writebyte(sha1nfo *s, uint8_t data) { - ++s->byteCount; - sha1_addUncounted(s, data); -} - -void sha1_write(sha1nfo *s, const char *data, size_t len) { - for (;len--;) sha1_writebyte(s, (uint8_t) *data++); -} - -void sha1_pad(sha1nfo *s) { - // Implement SHA-1 padding (fips180-2 §5.1.1) - - // Pad with 0x80 followed by 0x00 until the end of the block - sha1_addUncounted(s, 0x80); - while (s->bufferOffset != 56) sha1_addUncounted(s, 0x00); - - // Append length in the last 8 bytes - sha1_addUncounted(s, 0); // We're only using 32 bit lengths - sha1_addUncounted(s, 0); // But SHA-1 supports 64 bit lengths - sha1_addUncounted(s, 0); // So zero pad the top bits - sha1_addUncounted(s, s->byteCount >> 29); // Shifting to multiply by 8 - sha1_addUncounted(s, s->byteCount >> 21); // as SHA-1 supports bitstreams as well as - sha1_addUncounted(s, s->byteCount >> 13); // byte. - sha1_addUncounted(s, s->byteCount >> 5); - sha1_addUncounted(s, s->byteCount << 3); -} - -uint8_t* sha1_result(sha1nfo *s) { - // Pad to complete the last block - sha1_pad(s); - -#ifndef SHA_BIG_ENDIAN - // Swap byte order back - int i; - for (i=0; i<5; i++) { - s->state[i]= - (((s->state[i])<<24)& 0xff000000) - | (((s->state[i])<<8) & 0x00ff0000) - | (((s->state[i])>>8) & 0x0000ff00) - | (((s->state[i])>>24)& 0x000000ff); - } -#endif - - // Return pointer to hash (20 characters) - return (uint8_t*) s->state; -} - -} // namespace; Added for LibFuzzer - -namespace fuzzer { - -// The rest is added for LibFuzzer -void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out) { - sha1nfo s; - sha1_init(&s); - sha1_write(&s, (const char*)Data, Len); - memcpy(Out, sha1_result(&s), HASH_LENGTH); -} - -std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]) { - std::stringstream SS; - for (int i = 0; i < kSHA1NumBytes; i++) - SS << std::hex << std::setfill('0') << std::setw(2) << (unsigned)Sha1[i]; - return SS.str(); -} - -std::string Hash(const Unit &U) { - uint8_t Hash[kSHA1NumBytes]; - ComputeSHA1(U.data(), U.size(), Hash); - return Sha1ToString(Hash); -} - -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerSHA1.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerSHA1.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerSHA1.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerSHA1.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -//===- FuzzerSHA1.h - Internal header for the SHA1 utils --------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SHA1 utils. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_SHA1_H -#define LLVM_FUZZER_SHA1_H - -#include "FuzzerDefs.h" -#include -#include - -namespace fuzzer { - -// Private copy of SHA1 implementation. -static const int kSHA1NumBytes = 20; - -// Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. -void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); - -std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]); - -std::string Hash(const Unit &U); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_SHA1_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerShmem.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerShmem.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerShmem.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerShmem.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,69 +0,0 @@ -//===- FuzzerShmem.h - shared memory interface ------------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SharedMemoryRegion -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_SHMEM_H -#define LLVM_FUZZER_SHMEM_H - -#include -#include -#include - -#include "FuzzerDefs.h" - -namespace fuzzer { - -class SharedMemoryRegion { - public: - bool Create(const char *Name); - bool Open(const char *Name); - bool Destroy(const char *Name); - uint8_t *GetData() { return Data; } - void PostServer() {Post(0);} - void WaitServer() {Wait(0);} - void PostClient() {Post(1);} - void WaitClient() {Wait(1);} - - size_t WriteByteArray(const uint8_t *Bytes, size_t N) { - assert(N <= kShmemSize - sizeof(N)); - memcpy(GetData(), &N, sizeof(N)); - memcpy(GetData() + sizeof(N), Bytes, N); - assert(N == ReadByteArraySize()); - return N; - } - size_t ReadByteArraySize() { - size_t Res; - memcpy(&Res, GetData(), sizeof(Res)); - return Res; - } - uint8_t *GetByteArray() { return GetData() + sizeof(size_t); } - - bool IsServer() const { return Data && IAmServer; } - bool IsClient() const { return Data && !IAmServer; } - -private: - - static const size_t kShmemSize = 1 << 22; - bool IAmServer; - std::string Path(const char *Name); - std::string SemName(const char *Name, int Idx); - void Post(int Idx); - void Wait(int Idx); - - bool Map(int fd); - uint8_t *Data = nullptr; - void *Semaphore[2]; -}; - -extern SharedMemoryRegion SMR; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_SHMEM_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerShmemPosix.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerShmemPosix.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerShmemPosix.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerShmemPosix.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,103 +0,0 @@ -//===- FuzzerShmemPosix.cpp - Posix shared memory ---------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SharedMemoryRegion -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_POSIX - -#include "FuzzerIO.h" -#include "FuzzerShmem.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -std::string SharedMemoryRegion::Path(const char *Name) { - return DirPlusFile(TmpDir(), Name); -} - -std::string SharedMemoryRegion::SemName(const char *Name, int Idx) { - std::string Res(Name); - return Res + (char)('0' + Idx); -} - -bool SharedMemoryRegion::Map(int fd) { - Data = - (uint8_t *)mmap(0, kShmemSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); - if (Data == (uint8_t*)-1) - return false; - return true; -} - -bool SharedMemoryRegion::Create(const char *Name) { - int fd = open(Path(Name).c_str(), O_CREAT | O_RDWR, 0777); - if (fd < 0) return false; - if (ftruncate(fd, kShmemSize) < 0) return false; - if (!Map(fd)) - return false; - for (int i = 0; i < 2; i++) { - sem_unlink(SemName(Name, i).c_str()); - Semaphore[i] = sem_open(SemName(Name, i).c_str(), O_CREAT, 0644, 0); - if (Semaphore[i] == (void *)-1) - return false; - } - IAmServer = true; - return true; -} - -bool SharedMemoryRegion::Open(const char *Name) { - int fd = open(Path(Name).c_str(), O_RDWR); - if (fd < 0) return false; - struct stat stat_res; - if (0 != fstat(fd, &stat_res)) - return false; - assert(stat_res.st_size == kShmemSize); - if (!Map(fd)) - return false; - for (int i = 0; i < 2; i++) { - Semaphore[i] = sem_open(SemName(Name, i).c_str(), 0); - if (Semaphore[i] == (void *)-1) - return false; - } - IAmServer = false; - return true; -} - -bool SharedMemoryRegion::Destroy(const char *Name) { - return 0 == unlink(Path(Name).c_str()); -} - -void SharedMemoryRegion::Post(int Idx) { - assert(Idx == 0 || Idx == 1); - sem_post((sem_t*)Semaphore[Idx]); -} - -void SharedMemoryRegion::Wait(int Idx) { - assert(Idx == 0 || Idx == 1); - for (int i = 0; i < 10 && sem_wait((sem_t*)Semaphore[Idx]); i++) { - // sem_wait may fail if interrupted by a signal. - sleep(i); - if (i) - Printf("%s: sem_wait[%d] failed %s\n", i < 9 ? "WARNING" : "ERROR", i, - strerror(errno)); - if (i == 9) abort(); - } -} - -} // namespace fuzzer - -#endif // LIBFUZZER_POSIX diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerShmemWindows.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerShmemWindows.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerShmemWindows.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerShmemWindows.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,64 +0,0 @@ -//===- FuzzerShmemWindows.cpp - Posix shared memory -------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SharedMemoryRegion -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerIO.h" -#include "FuzzerShmem.h" - -#include -#include -#include -#include - -namespace fuzzer { - -std::string SharedMemoryRegion::Path(const char *Name) { - return DirPlusFile(TmpDir(), Name); -} - -std::string SharedMemoryRegion::SemName(const char *Name, int Idx) { - std::string Res(Name); - return Res + (char)('0' + Idx); -} - -bool SharedMemoryRegion::Map(int fd) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -bool SharedMemoryRegion::Create(const char *Name) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -bool SharedMemoryRegion::Open(const char *Name) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -bool SharedMemoryRegion::Destroy(const char *Name) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -void SharedMemoryRegion::Post(int Idx) { - assert(0 && "UNIMPLEMENTED"); -} - -void SharedMemoryRegion::Wait(int Idx) { - Semaphore[1] = nullptr; - assert(0 && "UNIMPLEMENTED"); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerTracePC.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerTracePC.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerTracePC.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerTracePC.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,583 +0,0 @@ -//===- FuzzerTracePC.cpp - PC tracing--------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Trace PCs. -// This module implements __sanitizer_cov_trace_pc_guard[_init], -// the callback required for -fsanitize-coverage=trace-pc-guard instrumentation. -// -//===----------------------------------------------------------------------===// - -#include "FuzzerTracePC.h" -#include "FuzzerCorpus.h" -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "FuzzerUtil.h" -#include "FuzzerValueBitMap.h" -#include - -// The coverage counters and PCs. -// These are declared as global variables named "__sancov_*" to simplify -// experiments with inlined instrumentation. -alignas(64) ATTRIBUTE_INTERFACE -uint8_t __sancov_trace_pc_guard_8bit_counters[fuzzer::TracePC::kNumPCs]; - -ATTRIBUTE_INTERFACE -uintptr_t __sancov_trace_pc_pcs[fuzzer::TracePC::kNumPCs]; - -// Used by -fsanitize-coverage=stack-depth to track stack depth -ATTRIBUTE_INTERFACE thread_local uintptr_t __sancov_lowest_stack; - -namespace fuzzer { - -TracePC TPC; - -int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr; - -uint8_t *TracePC::Counters() const { - return __sancov_trace_pc_guard_8bit_counters; -} - -uintptr_t *TracePC::PCs() const { - return __sancov_trace_pc_pcs; -} - -size_t TracePC::GetTotalPCCoverage() { - if (ObservedPCs.size()) - return ObservedPCs.size(); - size_t Res = 0; - for (size_t i = 1, N = GetNumPCs(); i < N; i++) - if (PCs()[i]) - Res++; - return Res; -} - - -void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) { - if (Start == Stop) return; - if (NumModulesWithInline8bitCounters && - ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return; - assert(NumModulesWithInline8bitCounters < - sizeof(ModuleCounters) / sizeof(ModuleCounters[0])); - ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop}; - NumInline8bitCounters += Stop - Start; -} - -void TracePC::HandlePCsInit(const uint8_t *Start, const uint8_t *Stop) { - const uintptr_t *B = reinterpret_cast(Start); - const uintptr_t *E = reinterpret_cast(Stop); - if (NumPCTables && ModulePCTable[NumPCTables - 1].Start == B) return; - assert(NumPCTables < sizeof(ModulePCTable) / sizeof(ModulePCTable[0])); - ModulePCTable[NumPCTables++] = {B, E}; - NumPCsInPCTables += E - B; -} - -void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) { - if (Start == Stop || *Start) return; - assert(NumModules < sizeof(Modules) / sizeof(Modules[0])); - for (uint32_t *P = Start; P < Stop; P++) { - NumGuards++; - if (NumGuards == kNumPCs) { - RawPrint( - "WARNING: The binary has too many instrumented PCs.\n" - " You may want to reduce the size of the binary\n" - " for more efficient fuzzing and precise coverage data\n"); - } - *P = NumGuards % kNumPCs; - } - Modules[NumModules].Start = Start; - Modules[NumModules].Stop = Stop; - NumModules++; -} - -void TracePC::PrintModuleInfo() { - if (NumGuards) { - Printf("INFO: Loaded %zd modules (%zd guards): ", NumModules, NumGuards); - for (size_t i = 0; i < NumModules; i++) - Printf("%zd [%p, %p), ", Modules[i].Stop - Modules[i].Start, - Modules[i].Start, Modules[i].Stop); - Printf("\n"); - } - if (NumModulesWithInline8bitCounters) { - Printf("INFO: Loaded %zd modules (%zd inline 8-bit counters): ", - NumModulesWithInline8bitCounters, NumInline8bitCounters); - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) - Printf("%zd [%p, %p), ", ModuleCounters[i].Stop - ModuleCounters[i].Start, - ModuleCounters[i].Start, ModuleCounters[i].Stop); - Printf("\n"); - } - if (NumPCTables) { - Printf("INFO: Loaded %zd PC tables (%zd PCs): ", NumPCTables, - NumPCsInPCTables); - for (size_t i = 0; i < NumPCTables; i++) { - Printf("%zd [%p,%p), ", ModulePCTable[i].Stop - ModulePCTable[i].Start, - ModulePCTable[i].Start, ModulePCTable[i].Stop); - } - Printf("\n"); - - if ((NumGuards && NumGuards != NumPCsInPCTables) || - (NumInline8bitCounters && NumInline8bitCounters != NumPCsInPCTables)) { - Printf("ERROR: The size of coverage PC tables does not match the" - " number of instrumented PCs. This might be a bug in the compiler," - " please contact the libFuzzer developers.\n"); - _Exit(1); - } - } - if (size_t NumClangCounters = ClangCountersEnd() - ClangCountersBegin()) - Printf("INFO: %zd Clang Coverage Counters\n", NumClangCounters); -} - -ATTRIBUTE_NO_SANITIZE_ALL -void TracePC::HandleCallerCallee(uintptr_t Caller, uintptr_t Callee) { - const uintptr_t kBits = 12; - const uintptr_t kMask = (1 << kBits) - 1; - uintptr_t Idx = (Caller & kMask) | ((Callee & kMask) << kBits); - ValueProfileMap.AddValueModPrime(Idx); -} - -void TracePC::UpdateObservedPCs() { - auto Observe = [&](uintptr_t PC) { - bool Inserted = ObservedPCs.insert(PC).second; - if (Inserted && DoPrintNewPCs) - PrintPC("\tNEW_PC: %p %F %L\n", "\tNEW_PC: %p\n", PC + 1); - }; - if (NumPCsInPCTables) { - if (NumInline8bitCounters == NumPCsInPCTables) { - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { - uint8_t *Beg = ModuleCounters[i].Start; - size_t Size = ModuleCounters[i].Stop - Beg; - assert(Size == - (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start)); - for (size_t j = 0; j < Size; j++) - if (Beg[j]) - Observe(ModulePCTable[i].Start[j]); - } - } else if (NumGuards == NumPCsInPCTables) { - size_t GuardIdx = 1; - for (size_t i = 0; i < NumModules; i++) { - uint32_t *Beg = Modules[i].Start; - size_t Size = Modules[i].Stop - Beg; - assert(Size == - (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start)); - for (size_t j = 0; j < Size; j++, GuardIdx++) - if (Counters()[GuardIdx]) - Observe(ModulePCTable[i].Start[j]); - } - } - } - if (size_t NumClangCounters = - ClangCountersEnd() - ClangCountersBegin()) { - auto P = ClangCountersBegin(); - for (size_t Idx = 0; Idx < NumClangCounters; Idx++) - if (P[Idx]) - Observe((uintptr_t)Idx); - } -} - -inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) { - // TODO: this implementation is x86 only. - // see sanitizer_common GetPreviousInstructionPc for full implementation. - return PC - 1; -} - -inline ALWAYS_INLINE uintptr_t GetNextInstructionPc(uintptr_t PC) { - // TODO: this implementation is x86 only. - // see sanitizer_common GetPreviousInstructionPc for full implementation. - return PC + 1; -} - -static std::string GetModuleName(uintptr_t PC) { - char ModulePathRaw[4096] = ""; // What's PATH_MAX in portable C++? - void *OffsetRaw = nullptr; - if (!EF->__sanitizer_get_module_and_offset_for_pc( - reinterpret_cast(PC), ModulePathRaw, - sizeof(ModulePathRaw), &OffsetRaw)) - return ""; - return ModulePathRaw; -} - -void TracePC::PrintCoverage() { - if (!EF->__sanitizer_symbolize_pc || - !EF->__sanitizer_get_module_and_offset_for_pc) { - Printf("INFO: __sanitizer_symbolize_pc or " - "__sanitizer_get_module_and_offset_for_pc is not available," - " not printing coverage\n"); - return; - } - Printf("COVERAGE:\n"); - std::string LastFunctionName = ""; - std::string LastFileStr = ""; - std::set UncoveredLines; - std::set CoveredLines; - - auto FunctionEndCallback = [&](const std::string &CurrentFunc, - const std::string &CurrentFile) { - if (LastFunctionName != CurrentFunc) { - if (CoveredLines.empty() && !UncoveredLines.empty()) { - Printf("UNCOVERED_FUNC: %s\n", LastFunctionName.c_str()); - } else { - for (auto Line : UncoveredLines) { - if (!CoveredLines.count(Line)) - Printf("UNCOVERED_LINE: %s %s:%zd\n", LastFunctionName.c_str(), - LastFileStr.c_str(), Line); - } - } - - UncoveredLines.clear(); - CoveredLines.clear(); - LastFunctionName = CurrentFunc; - LastFileStr = CurrentFile; - } - }; - - for (size_t i = 0; i < NumPCTables; i++) { - auto &M = ModulePCTable[i]; - assert(M.Start < M.Stop); - auto ModuleName = GetModuleName(*M.Start); - for (auto Ptr = M.Start; Ptr < M.Stop; Ptr++) { - auto PC = *Ptr; - auto VisualizePC = GetNextInstructionPc(PC); - bool IsObserved = ObservedPCs.count(PC); - std::string FileStr = DescribePC("%s", VisualizePC); - if (!IsInterestingCoverageFile(FileStr)) continue; - std::string FunctionStr = DescribePC("%F", VisualizePC); - FunctionEndCallback(FunctionStr, FileStr); - std::string LineStr = DescribePC("%l", VisualizePC); - size_t Line = std::stoul(LineStr); - if (IsObserved && CoveredLines.insert(Line).second) - Printf("COVERED: %s %s:%zd\n", FunctionStr.c_str(), FileStr.c_str(), - Line); - else - UncoveredLines.insert(Line); - } - } - FunctionEndCallback("", ""); -} - -void TracePC::DumpCoverage() { - if (EF->__sanitizer_dump_coverage) { - std::vector PCsCopy(GetNumPCs()); - for (size_t i = 0; i < GetNumPCs(); i++) - PCsCopy[i] = PCs()[i] ? GetPreviousInstructionPc(PCs()[i]) : 0; - EF->__sanitizer_dump_coverage(PCsCopy.data(), PCsCopy.size()); - } -} - -// Value profile. -// We keep track of various values that affect control flow. -// These values are inserted into a bit-set-based hash map. -// Every new bit in the map is treated as a new coverage. -// -// For memcmp/strcmp/etc the interesting value is the length of the common -// prefix of the parameters. -// For cmp instructions the interesting value is a XOR of the parameters. -// The interesting value is mixed up with the PC and is then added to the map. - -ATTRIBUTE_NO_SANITIZE_ALL -void TracePC::AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, - size_t n, bool StopAtZero) { - if (!n) return; - size_t Len = std::min(n, Word::GetMaxSize()); - const uint8_t *A1 = reinterpret_cast(s1); - const uint8_t *A2 = reinterpret_cast(s2); - uint8_t B1[Word::kMaxSize]; - uint8_t B2[Word::kMaxSize]; - // Copy the data into locals in this non-msan-instrumented function - // to avoid msan complaining further. - size_t Hash = 0; // Compute some simple hash of both strings. - for (size_t i = 0; i < Len; i++) { - B1[i] = A1[i]; - B2[i] = A2[i]; - size_t T = B1[i]; - Hash ^= (T << 8) | B2[i]; - } - size_t I = 0; - for (; I < Len; I++) - if (B1[I] != B2[I] || (StopAtZero && B1[I] == 0)) - break; - size_t PC = reinterpret_cast(caller_pc); - size_t Idx = (PC & 4095) | (I << 12); - ValueProfileMap.AddValue(Idx); - TORCW.Insert(Idx ^ Hash, Word(B1, Len), Word(B2, Len)); -} - -template -ATTRIBUTE_TARGET_POPCNT ALWAYS_INLINE -ATTRIBUTE_NO_SANITIZE_ALL -void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) { - uint64_t ArgXor = Arg1 ^ Arg2; - uint64_t ArgDistance = __builtin_popcountll(ArgXor) + 1; // [1,65] - uintptr_t Idx = ((PC & 4095) + 1) * ArgDistance; - if (sizeof(T) == 4) - TORC4.Insert(ArgXor, Arg1, Arg2); - else if (sizeof(T) == 8) - TORC8.Insert(ArgXor, Arg1, Arg2); - ValueProfileMap.AddValue(Idx); -} - -static size_t InternalStrnlen(const char *S, size_t MaxLen) { - size_t Len = 0; - for (; Len < MaxLen && S[Len]; Len++) {} - return Len; -} - -// Finds min of (strlen(S1), strlen(S2)). -// Needed bacause one of these strings may actually be non-zero terminated. -static size_t InternalStrnlen2(const char *S1, const char *S2) { - size_t Len = 0; - for (; S1[Len] && S2[Len]; Len++) {} - return Len; -} - -void TracePC::ClearInlineCounters() { - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { - uint8_t *Beg = ModuleCounters[i].Start; - size_t Size = ModuleCounters[i].Stop - Beg; - memset(Beg, 0, Size); - } -} - -void TracePC::RecordInitialStack() { - InitialStack = __sancov_lowest_stack; -} - -uintptr_t TracePC::GetMaxStackOffset() const { - return InitialStack - __sancov_lowest_stack; // Stack grows down -} - -} // namespace fuzzer - -extern "C" { -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -void __sanitizer_cov_trace_pc_guard(uint32_t *Guard) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - uint32_t Idx = *Guard; - __sancov_trace_pc_pcs[Idx] = PC; - __sancov_trace_pc_guard_8bit_counters[Idx]++; -} - -// Best-effort support for -fsanitize-coverage=trace-pc, which is available -// in both Clang and GCC. -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -void __sanitizer_cov_trace_pc() { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - uintptr_t Idx = PC & (((uintptr_t)1 << fuzzer::TracePC::kTracePcBits) - 1); - __sancov_trace_pc_pcs[Idx] = PC; - __sancov_trace_pc_guard_8bit_counters[Idx]++; -} - -ATTRIBUTE_INTERFACE -void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) { - fuzzer::TPC.HandleInit(Start, Stop); -} - -ATTRIBUTE_INTERFACE -void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) { - fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop); -} - -ATTRIBUTE_INTERFACE -void __sanitizer_cov_pcs_init(const uint8_t *pcs_beg, const uint8_t *pcs_end) { - fuzzer::TPC.HandlePCsInit(pcs_beg, pcs_end); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCallerCallee(PC, Callee); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -// Now the __sanitizer_cov_trace_const_cmp[1248] callbacks just mimic -// the behaviour of __sanitizer_cov_trace_cmp[1248] ones. This, however, -// should be changed later to make full use of instrumentation. -void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) { - uint64_t N = Cases[0]; - uint64_t ValSizeInBits = Cases[1]; - uint64_t *Vals = Cases + 2; - // Skip the most common and the most boring case. - if (Vals[N - 1] < 256 && Val < 256) - return; - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - size_t i; - uint64_t Token = 0; - for (i = 0; i < N; i++) { - Token = Val ^ Vals[i]; - if (Val < Vals[i]) - break; - } - - if (ValSizeInBits == 16) - fuzzer::TPC.HandleCmp(PC + i, static_cast(Token), (uint16_t)(0)); - else if (ValSizeInBits == 32) - fuzzer::TPC.HandleCmp(PC + i, static_cast(Token), (uint32_t)(0)); - else - fuzzer::TPC.HandleCmp(PC + i, Token, (uint64_t)(0)); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_div4(uint32_t Val) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Val, (uint32_t)0); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_div8(uint64_t Val) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Val, (uint64_t)0); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_gep(uintptr_t Idx) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, - const void *s2, size_t n, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - if (result == 0) return; // No reason to mutate. - if (n <= 1) return; // Not interesting. - fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1, - const char *s2, size_t n, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - if (result == 0) return; // No reason to mutate. - size_t Len1 = fuzzer::InternalStrnlen(s1, n); - size_t Len2 = fuzzer::InternalStrnlen(s2, n); - n = std::min(n, Len1); - n = std::min(n, Len2); - if (n <= 1) return; // Not interesting. - fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, - const char *s2, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - if (result == 0) return; // No reason to mutate. - size_t N = fuzzer::InternalStrnlen2(s1, s2); - if (N <= 1) return; // Not interesting. - fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, - const char *s2, size_t n, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, - const char *s2, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1, - const char *s2, char *result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - fuzzer::TPC.MMT.Add(reinterpret_cast(s2), strlen(s2)); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1, - const char *s2, char *result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - fuzzer::TPC.MMT.Add(reinterpret_cast(s2), strlen(s2)); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, - const void *s2, size_t len2, void *result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - fuzzer::TPC.MMT.Add(reinterpret_cast(s2), len2); -} -} // extern "C" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerTracePC.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerTracePC.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerTracePC.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerTracePC.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,257 +0,0 @@ -//===- FuzzerTracePC.h - Internal header for the Fuzzer ---------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::TracePC -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_TRACE_PC -#define LLVM_FUZZER_TRACE_PC - -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" -#include "FuzzerValueBitMap.h" - -#include - -namespace fuzzer { - -// TableOfRecentCompares (TORC) remembers the most recently performed -// comparisons of type T. -// We record the arguments of CMP instructions in this table unconditionally -// because it seems cheaper this way than to compute some expensive -// conditions inside __sanitizer_cov_trace_cmp*. -// After the unit has been executed we may decide to use the contents of -// this table to populate a Dictionary. -template -struct TableOfRecentCompares { - static const size_t kSize = kSizeT; - struct Pair { - T A, B; - }; - ATTRIBUTE_NO_SANITIZE_ALL - void Insert(size_t Idx, const T &Arg1, const T &Arg2) { - Idx = Idx % kSize; - Table[Idx].A = Arg1; - Table[Idx].B = Arg2; - } - - Pair Get(size_t I) { return Table[I % kSize]; } - - Pair Table[kSize]; -}; - -template -struct MemMemTable { - static const size_t kSize = kSizeT; - Word MemMemWords[kSize]; - Word EmptyWord; - - void Add(const uint8_t *Data, size_t Size) { - if (Size <= 2) return; - Size = std::min(Size, Word::GetMaxSize()); - size_t Idx = SimpleFastHash(Data, Size) % kSize; - MemMemWords[Idx].Set(Data, Size); - } - const Word &Get(size_t Idx) { - for (size_t i = 0; i < kSize; i++) { - const Word &W = MemMemWords[(Idx + i) % kSize]; - if (W.size()) return W; - } - EmptyWord.Set(nullptr, 0); - return EmptyWord; - } -}; - -class TracePC { - public: - static const size_t kNumPCs = 1 << 21; - // How many bits of PC are used from __sanitizer_cov_trace_pc. - static const size_t kTracePcBits = 18; - - void HandleInit(uint32_t *Start, uint32_t *Stop); - void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop); - void HandlePCsInit(const uint8_t *Start, const uint8_t *Stop); - void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); - template void HandleCmp(uintptr_t PC, T Arg1, T Arg2); - size_t GetTotalPCCoverage(); - void SetUseCounters(bool UC) { UseCounters = UC; } - void SetUseValueProfile(bool VP) { UseValueProfile = VP; } - void SetPrintNewPCs(bool P) { DoPrintNewPCs = P; } - void UpdateObservedPCs(); - template void CollectFeatures(Callback CB) const; - - void ResetMaps() { - ValueProfileMap.Reset(); - if (NumModules) - memset(Counters(), 0, GetNumPCs()); - ClearExtraCounters(); - ClearInlineCounters(); - ClearClangCounters(); - } - - void ClearInlineCounters(); - - void UpdateFeatureSet(size_t CurrentElementIdx, size_t CurrentElementSize); - void PrintFeatureSet(); - - void PrintModuleInfo(); - - void PrintCoverage(); - void DumpCoverage(); - - void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, - size_t n, bool StopAtZero); - - TableOfRecentCompares TORC4; - TableOfRecentCompares TORC8; - TableOfRecentCompares TORCW; - MemMemTable<1024> MMT; - - size_t GetNumPCs() const { - return NumGuards == 0 ? (1 << kTracePcBits) : Min(kNumPCs, NumGuards + 1); - } - uintptr_t GetPC(size_t Idx) { - assert(Idx < GetNumPCs()); - return PCs()[Idx]; - } - - void RecordInitialStack(); - uintptr_t GetMaxStackOffset() const; - - template - void ForEachObservedPC(CallBack CB) { - for (auto PC : ObservedPCs) - CB(PC); - } - -private: - bool UseCounters = false; - bool UseValueProfile = false; - bool DoPrintNewPCs = false; - - struct Module { - uint32_t *Start, *Stop; - }; - - Module Modules[4096]; - size_t NumModules; // linker-initialized. - size_t NumGuards; // linker-initialized. - - struct { uint8_t *Start, *Stop; } ModuleCounters[4096]; - size_t NumModulesWithInline8bitCounters; // linker-initialized. - size_t NumInline8bitCounters; - - struct { const uintptr_t *Start, *Stop; } ModulePCTable[4096]; - size_t NumPCTables; - size_t NumPCsInPCTables; - - uint8_t *Counters() const; - uintptr_t *PCs() const; - - std::set ObservedPCs; - - ValueBitMap ValueProfileMap; - uintptr_t InitialStack; -}; - -template -// void Callback(size_t FirstFeature, size_t Idx, uint8_t Value); -ATTRIBUTE_NO_SANITIZE_ALL -void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, - size_t FirstFeature, Callback Handle8bitCounter) { - typedef uintptr_t LargeType; - const size_t Step = sizeof(LargeType) / sizeof(uint8_t); - const size_t StepMask = Step - 1; - auto P = Begin; - // Iterate by 1 byte until either the alignment boundary or the end. - for (; reinterpret_cast(P) & StepMask && P < End; P++) - if (uint8_t V = *P) - Handle8bitCounter(FirstFeature, P - Begin, V); - - // Iterate by Step bytes at a time. - for (; P < End; P += Step) - if (LargeType Bundle = *reinterpret_cast(P)) - for (size_t I = 0; I < Step; I++, Bundle >>= 8) - if (uint8_t V = Bundle & 0xff) - Handle8bitCounter(FirstFeature, P - Begin + I, V); - - // Iterate by 1 byte until the end. - for (; P < End; P++) - if (uint8_t V = *P) - Handle8bitCounter(FirstFeature, P - Begin, V); -} - -// Given a non-zero Counters returns a number in [0,7]. -template -unsigned CounterToFeature(T Counter) { - assert(Counter); - unsigned Bit = 0; - /**/ if (Counter >= 128) Bit = 7; - else if (Counter >= 32) Bit = 6; - else if (Counter >= 16) Bit = 5; - else if (Counter >= 8) Bit = 4; - else if (Counter >= 4) Bit = 3; - else if (Counter >= 3) Bit = 2; - else if (Counter >= 2) Bit = 1; - return Bit; -} - -template // bool Callback(size_t Feature) -ATTRIBUTE_NO_SANITIZE_ADDRESS -__attribute__((noinline)) -void TracePC::CollectFeatures(Callback HandleFeature) const { - uint8_t *Counters = this->Counters(); - size_t N = GetNumPCs(); - auto Handle8bitCounter = [&](size_t FirstFeature, - size_t Idx, uint8_t Counter) { - HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Counter)); - }; - - size_t FirstFeature = 0; - - if (!NumInline8bitCounters) { - ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter); - FirstFeature += N * 8; - } - - if (NumInline8bitCounters) { - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { - ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop, - FirstFeature, Handle8bitCounter); - FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start); - } - } - - if (size_t NumClangCounters = ClangCountersEnd() - ClangCountersBegin()) { - auto P = ClangCountersBegin(); - for (size_t Idx = 0; Idx < NumClangCounters; Idx++) - if (auto Cnt = P[Idx]) - HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Cnt)); - FirstFeature += NumClangCounters; - } - - ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature, - Handle8bitCounter); - FirstFeature += (ExtraCountersEnd() - ExtraCountersBegin()) * 8; - - if (UseValueProfile) { - ValueProfileMap.ForEach([&](size_t Idx) { - HandleFeature(FirstFeature + Idx); - }); - FirstFeature += ValueProfileMap.SizeInBits(); - } - - if (auto MaxStackOffset = GetMaxStackOffset()) - HandleFeature(FirstFeature + MaxStackOffset); -} - -extern TracePC TPC; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_TRACE_PC diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtil.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtil.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtil.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtil.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,215 +0,0 @@ -//===- FuzzerUtil.cpp - Misc utils ----------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils. -//===----------------------------------------------------------------------===// - -#include "FuzzerUtil.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -void PrintHexArray(const uint8_t *Data, size_t Size, - const char *PrintAfter) { - for (size_t i = 0; i < Size; i++) - Printf("0x%x,", (unsigned)Data[i]); - Printf("%s", PrintAfter); -} - -void Print(const Unit &v, const char *PrintAfter) { - PrintHexArray(v.data(), v.size(), PrintAfter); -} - -void PrintASCIIByte(uint8_t Byte) { - if (Byte == '\\') - Printf("\\\\"); - else if (Byte == '"') - Printf("\\\""); - else if (Byte >= 32 && Byte < 127) - Printf("%c", Byte); - else - Printf("\\x%02x", Byte); -} - -void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter) { - for (size_t i = 0; i < Size; i++) - PrintASCIIByte(Data[i]); - Printf("%s", PrintAfter); -} - -void PrintASCII(const Unit &U, const char *PrintAfter) { - PrintASCII(U.data(), U.size(), PrintAfter); -} - -bool ToASCII(uint8_t *Data, size_t Size) { - bool Changed = false; - for (size_t i = 0; i < Size; i++) { - uint8_t &X = Data[i]; - auto NewX = X; - NewX &= 127; - if (!isspace(NewX) && !isprint(NewX)) - NewX = ' '; - Changed |= NewX != X; - X = NewX; - } - return Changed; -} - -bool IsASCII(const Unit &U) { return IsASCII(U.data(), U.size()); } - -bool IsASCII(const uint8_t *Data, size_t Size) { - for (size_t i = 0; i < Size; i++) - if (!(isprint(Data[i]) || isspace(Data[i]))) return false; - return true; -} - -bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) { - U->clear(); - if (Str.empty()) return false; - size_t L = 0, R = Str.size() - 1; // We are parsing the range [L,R]. - // Skip spaces from both sides. - while (L < R && isspace(Str[L])) L++; - while (R > L && isspace(Str[R])) R--; - if (R - L < 2) return false; - // Check the closing " - if (Str[R] != '"') return false; - R--; - // Find the opening " - while (L < R && Str[L] != '"') L++; - if (L >= R) return false; - assert(Str[L] == '\"'); - L++; - assert(L <= R); - for (size_t Pos = L; Pos <= R; Pos++) { - uint8_t V = (uint8_t)Str[Pos]; - if (!isprint(V) && !isspace(V)) return false; - if (V =='\\') { - // Handle '\\' - if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) { - U->push_back(Str[Pos + 1]); - Pos++; - continue; - } - // Handle '\xAB' - if (Pos + 3 <= R && Str[Pos + 1] == 'x' - && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) { - char Hex[] = "0xAA"; - Hex[2] = Str[Pos + 2]; - Hex[3] = Str[Pos + 3]; - U->push_back(strtol(Hex, nullptr, 16)); - Pos += 3; - continue; - } - return false; // Invalid escape. - } else { - // Any other character. - U->push_back(V); - } - } - return true; -} - -bool ParseDictionaryFile(const std::string &Text, std::vector *Units) { - if (Text.empty()) { - Printf("ParseDictionaryFile: file does not exist or is empty\n"); - return false; - } - std::istringstream ISS(Text); - Units->clear(); - Unit U; - int LineNo = 0; - std::string S; - while (std::getline(ISS, S, '\n')) { - LineNo++; - size_t Pos = 0; - while (Pos < S.size() && isspace(S[Pos])) Pos++; // Skip spaces. - if (Pos == S.size()) continue; // Empty line. - if (S[Pos] == '#') continue; // Comment line. - if (ParseOneDictionaryEntry(S, &U)) { - Units->push_back(U); - } else { - Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo, - S.c_str()); - return false; - } - } - return true; -} - -std::string Base64(const Unit &U) { - static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - std::string Res; - size_t i; - for (i = 0; i + 2 < U.size(); i += 3) { - uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2]; - Res += Table[(x >> 18) & 63]; - Res += Table[(x >> 12) & 63]; - Res += Table[(x >> 6) & 63]; - Res += Table[x & 63]; - } - if (i + 1 == U.size()) { - uint32_t x = (U[i] << 16); - Res += Table[(x >> 18) & 63]; - Res += Table[(x >> 12) & 63]; - Res += "=="; - } else if (i + 2 == U.size()) { - uint32_t x = (U[i] << 16) + (U[i + 1] << 8); - Res += Table[(x >> 18) & 63]; - Res += Table[(x >> 12) & 63]; - Res += Table[(x >> 6) & 63]; - Res += "="; - } - return Res; -} - -std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC) { - if (!EF->__sanitizer_symbolize_pc) return ""; - char PcDescr[1024]; - EF->__sanitizer_symbolize_pc(reinterpret_cast(PC), - SymbolizedFMT, PcDescr, sizeof(PcDescr)); - PcDescr[sizeof(PcDescr) - 1] = 0; // Just in case. - return PcDescr; -} - -void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) { - if (EF->__sanitizer_symbolize_pc) - Printf("%s", DescribePC(SymbolizedFMT, PC).c_str()); - else - Printf(FallbackFMT, PC); -} - -unsigned NumberOfCpuCores() { - unsigned N = std::thread::hardware_concurrency(); - if (!N) { - Printf("WARNING: std::thread::hardware_concurrency not well defined for " - "your platform. Assuming CPU count of 1.\n"); - N = 1; - } - return N; -} - -size_t SimpleFastHash(const uint8_t *Data, size_t Size) { - size_t Res = 0; - for (size_t i = 0; i < Size; i++) - Res = Res * 11 + Data[i]; - return Res; -} - -} // namespace fuzzer diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilDarwin.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilDarwin.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilDarwin.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilDarwin.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,161 +0,0 @@ -//===- FuzzerUtilDarwin.cpp - Misc utils ----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils for Darwin. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_APPLE - -#include "FuzzerIO.h" -#include -#include -#include -#include -#include -#include - -// There is no header for this on macOS so declare here -extern "C" char **environ; - -namespace fuzzer { - -static std::mutex SignalMutex; -// Global variables used to keep track of how signal handling should be -// restored. They should **not** be accessed without holding `SignalMutex`. -static int ActiveThreadCount = 0; -static struct sigaction OldSigIntAction; -static struct sigaction OldSigQuitAction; -static sigset_t OldBlockedSignalsSet; - -// This is a reimplementation of Libc's `system()`. On Darwin the Libc -// implementation contains a mutex which prevents it from being used -// concurrently. This implementation **can** be used concurrently. It sets the -// signal handlers when the first thread enters and restores them when the last -// thread finishes execution of the function and ensures this is not racey by -// using a mutex. -int ExecuteCommand(const std::string &Command) { - posix_spawnattr_t SpawnAttributes; - if (posix_spawnattr_init(&SpawnAttributes)) - return -1; - // Block and ignore signals of the current process when the first thread - // enters. - { - std::lock_guard Lock(SignalMutex); - if (ActiveThreadCount == 0) { - static struct sigaction IgnoreSignalAction; - sigset_t BlockedSignalsSet; - memset(&IgnoreSignalAction, 0, sizeof(IgnoreSignalAction)); - IgnoreSignalAction.sa_handler = SIG_IGN; - - if (sigaction(SIGINT, &IgnoreSignalAction, &OldSigIntAction) == -1) { - Printf("Failed to ignore SIGINT\n"); - (void)posix_spawnattr_destroy(&SpawnAttributes); - return -1; - } - if (sigaction(SIGQUIT, &IgnoreSignalAction, &OldSigQuitAction) == -1) { - Printf("Failed to ignore SIGQUIT\n"); - // Try our best to restore the signal handlers. - (void)sigaction(SIGINT, &OldSigIntAction, NULL); - (void)posix_spawnattr_destroy(&SpawnAttributes); - return -1; - } - - (void)sigemptyset(&BlockedSignalsSet); - (void)sigaddset(&BlockedSignalsSet, SIGCHLD); - if (sigprocmask(SIG_BLOCK, &BlockedSignalsSet, &OldBlockedSignalsSet) == - -1) { - Printf("Failed to block SIGCHLD\n"); - // Try our best to restore the signal handlers. - (void)sigaction(SIGQUIT, &OldSigQuitAction, NULL); - (void)sigaction(SIGINT, &OldSigIntAction, NULL); - (void)posix_spawnattr_destroy(&SpawnAttributes); - return -1; - } - } - ++ActiveThreadCount; - } - - // NOTE: Do not introduce any new `return` statements past this - // point. It is important that `ActiveThreadCount` always be decremented - // when leaving this function. - - // Make sure the child process uses the default handlers for the - // following signals rather than inheriting what the parent has. - sigset_t DefaultSigSet; - (void)sigemptyset(&DefaultSigSet); - (void)sigaddset(&DefaultSigSet, SIGQUIT); - (void)sigaddset(&DefaultSigSet, SIGINT); - (void)posix_spawnattr_setsigdefault(&SpawnAttributes, &DefaultSigSet); - // Make sure the child process doesn't block SIGCHLD - (void)posix_spawnattr_setsigmask(&SpawnAttributes, &OldBlockedSignalsSet); - short SpawnFlags = POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK; - (void)posix_spawnattr_setflags(&SpawnAttributes, SpawnFlags); - - pid_t Pid; - char **Environ = environ; // Read from global - const char *CommandCStr = Command.c_str(); - char *const Argv[] = { - strdup("sh"), - strdup("-c"), - strdup(CommandCStr), - NULL - }; - int ErrorCode = 0, ProcessStatus = 0; - // FIXME: We probably shouldn't hardcode the shell path. - ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes, - Argv, Environ); - (void)posix_spawnattr_destroy(&SpawnAttributes); - if (!ErrorCode) { - pid_t SavedPid = Pid; - do { - // Repeat until call completes uninterrupted. - Pid = waitpid(SavedPid, &ProcessStatus, /*options=*/0); - } while (Pid == -1 && errno == EINTR); - if (Pid == -1) { - // Fail for some other reason. - ProcessStatus = -1; - } - } else if (ErrorCode == ENOMEM || ErrorCode == EAGAIN) { - // Fork failure. - ProcessStatus = -1; - } else { - // Shell execution failure. - ProcessStatus = W_EXITCODE(127, 0); - } - for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i) - free(Argv[i]); - - // Restore the signal handlers of the current process when the last thread - // using this function finishes. - { - std::lock_guard Lock(SignalMutex); - --ActiveThreadCount; - if (ActiveThreadCount == 0) { - bool FailedRestore = false; - if (sigaction(SIGINT, &OldSigIntAction, NULL) == -1) { - Printf("Failed to restore SIGINT handling\n"); - FailedRestore = true; - } - if (sigaction(SIGQUIT, &OldSigQuitAction, NULL) == -1) { - Printf("Failed to restore SIGQUIT handling\n"); - FailedRestore = true; - } - if (sigprocmask(SIG_BLOCK, &OldBlockedSignalsSet, NULL) == -1) { - Printf("Failed to unblock SIGCHLD\n"); - FailedRestore = true; - } - if (FailedRestore) - ProcessStatus = -1; - } - } - return ProcessStatus; -} - -} // namespace fuzzer - -#endif // LIBFUZZER_APPLE diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtil.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtil.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtil.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtil.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,84 +0,0 @@ -//===- FuzzerUtil.h - Internal header for the Fuzzer Utils ------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Util functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_UTIL_H -#define LLVM_FUZZER_UTIL_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -void PrintHexArray(const Unit &U, const char *PrintAfter = ""); - -void PrintHexArray(const uint8_t *Data, size_t Size, - const char *PrintAfter = ""); - -void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); - -void PrintASCII(const Unit &U, const char *PrintAfter = ""); - -// Changes U to contain only ASCII (isprint+isspace) characters. -// Returns true iff U has been changed. -bool ToASCII(uint8_t *Data, size_t Size); - -bool IsASCII(const Unit &U); - -bool IsASCII(const uint8_t *Data, size_t Size); - -std::string Base64(const Unit &U); - -void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC); - -std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC); - -unsigned NumberOfCpuCores(); - -// Platform specific functions. -void SetSignalHandler(const FuzzingOptions& Options); - -void SleepSeconds(int Seconds); - -unsigned long GetPid(); - -size_t GetPeakRSSMb(); - -int ExecuteCommand(const std::string &Command); - -FILE *OpenProcessPipe(const char *Command, const char *Mode); - -const void *SearchMemory(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); - -std::string CloneArgsWithoutX(const std::vector &Args, - const char *X1, const char *X2); - -inline std::string CloneArgsWithoutX(const std::vector &Args, - const char *X) { - return CloneArgsWithoutX(Args, X, X); -} - -inline std::pair SplitBefore(std::string X, - std::string S) { - auto Pos = S.find(X); - if (Pos == std::string::npos) - return std::make_pair(S, ""); - return std::make_pair(S.substr(0, Pos), S.substr(Pos)); -} - -std::string DisassembleCmd(const std::string &FileName); - -std::string SearchRegexCmd(const std::string &Regex); - -size_t SimpleFastHash(const uint8_t *Data, size_t Size); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_UTIL_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilLinux.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilLinux.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilLinux.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilLinux.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -//===- FuzzerUtilLinux.cpp - Misc utils for Linux. ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils for Linux. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_LINUX - -#include - -namespace fuzzer { - -int ExecuteCommand(const std::string &Command) { - return system(Command.c_str()); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_LINUX diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilPosix.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilPosix.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilPosix.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilPosix.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,144 +0,0 @@ -//===- FuzzerUtilPosix.cpp - Misc utils for Posix. ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils implementation using Posix API. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_POSIX -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -static void AlarmHandler(int, siginfo_t *, void *) { - Fuzzer::StaticAlarmCallback(); -} - -static void CrashHandler(int, siginfo_t *, void *) { - Fuzzer::StaticCrashSignalCallback(); -} - -static void InterruptHandler(int, siginfo_t *, void *) { - Fuzzer::StaticInterruptCallback(); -} - -static void FileSizeExceedHandler(int, siginfo_t *, void *) { - Fuzzer::StaticFileSizeExceedCallback(); -} - -static void SetSigaction(int signum, - void (*callback)(int, siginfo_t *, void *)) { - struct sigaction sigact = {}; - if (sigaction(signum, nullptr, &sigact)) { - Printf("libFuzzer: sigaction failed with %d\n", errno); - exit(1); - } - if (sigact.sa_flags & SA_SIGINFO) { - if (sigact.sa_sigaction) - return; - } else { - if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN && - sigact.sa_handler != SIG_ERR) - return; - } - - sigact = {}; - sigact.sa_sigaction = callback; - if (sigaction(signum, &sigact, 0)) { - Printf("libFuzzer: sigaction failed with %d\n", errno); - exit(1); - } -} - -void SetTimer(int Seconds) { - struct itimerval T { - {Seconds, 0}, { Seconds, 0 } - }; - if (setitimer(ITIMER_REAL, &T, nullptr)) { - Printf("libFuzzer: setitimer failed with %d\n", errno); - exit(1); - } - SetSigaction(SIGALRM, AlarmHandler); -} - -void SetSignalHandler(const FuzzingOptions& Options) { - if (Options.UnitTimeoutSec > 0) - SetTimer(Options.UnitTimeoutSec / 2 + 1); - if (Options.HandleInt) - SetSigaction(SIGINT, InterruptHandler); - if (Options.HandleTerm) - SetSigaction(SIGTERM, InterruptHandler); - if (Options.HandleSegv) - SetSigaction(SIGSEGV, CrashHandler); - if (Options.HandleBus) - SetSigaction(SIGBUS, CrashHandler); - if (Options.HandleAbrt) - SetSigaction(SIGABRT, CrashHandler); - if (Options.HandleIll) - SetSigaction(SIGILL, CrashHandler); - if (Options.HandleFpe) - SetSigaction(SIGFPE, CrashHandler); - if (Options.HandleXfsz) - SetSigaction(SIGXFSZ, FileSizeExceedHandler); -} - -void SleepSeconds(int Seconds) { - sleep(Seconds); // Use C API to avoid coverage from instrumented libc++. -} - -unsigned long GetPid() { return (unsigned long)getpid(); } - -size_t GetPeakRSSMb() { - struct rusage usage; - if (getrusage(RUSAGE_SELF, &usage)) - return 0; - if (LIBFUZZER_LINUX) { - // ru_maxrss is in KiB - return usage.ru_maxrss >> 10; - } else if (LIBFUZZER_APPLE) { - // ru_maxrss is in bytes - return usage.ru_maxrss >> 20; - } - assert(0 && "GetPeakRSSMb() is not implemented for your platform"); - return 0; -} - -FILE *OpenProcessPipe(const char *Command, const char *Mode) { - return popen(Command, Mode); -} - -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - return memmem(Data, DataLen, Patt, PattLen); -} - -std::string DisassembleCmd(const std::string &FileName) { - return "objdump -d " + FileName; -} - -std::string SearchRegexCmd(const std::string &Regex) { - return "grep '" + Regex + "'"; -} - -} // namespace fuzzer - -#endif // LIBFUZZER_POSIX diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilWindows.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilWindows.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerUtilWindows.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerUtilWindows.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,193 +0,0 @@ -//===- FuzzerUtilWindows.cpp - Misc utils for Windows. --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils implementation for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// This must be included after windows.h. -#include - -namespace fuzzer { - -static const FuzzingOptions* HandlerOpt = nullptr; - -static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { - switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { - case EXCEPTION_ACCESS_VIOLATION: - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: - case EXCEPTION_STACK_OVERFLOW: - if (HandlerOpt->HandleSegv) - Fuzzer::StaticCrashSignalCallback(); - break; - case EXCEPTION_DATATYPE_MISALIGNMENT: - case EXCEPTION_IN_PAGE_ERROR: - if (HandlerOpt->HandleBus) - Fuzzer::StaticCrashSignalCallback(); - break; - case EXCEPTION_ILLEGAL_INSTRUCTION: - case EXCEPTION_PRIV_INSTRUCTION: - if (HandlerOpt->HandleIll) - Fuzzer::StaticCrashSignalCallback(); - break; - case EXCEPTION_FLT_DENORMAL_OPERAND: - case EXCEPTION_FLT_DIVIDE_BY_ZERO: - case EXCEPTION_FLT_INEXACT_RESULT: - case EXCEPTION_FLT_INVALID_OPERATION: - case EXCEPTION_FLT_OVERFLOW: - case EXCEPTION_FLT_STACK_CHECK: - case EXCEPTION_FLT_UNDERFLOW: - case EXCEPTION_INT_DIVIDE_BY_ZERO: - case EXCEPTION_INT_OVERFLOW: - if (HandlerOpt->HandleFpe) - Fuzzer::StaticCrashSignalCallback(); - break; - // TODO: handle (Options.HandleXfsz) - } - return EXCEPTION_CONTINUE_SEARCH; -} - -BOOL WINAPI CtrlHandler(DWORD dwCtrlType) { - switch (dwCtrlType) { - case CTRL_C_EVENT: - if (HandlerOpt->HandleInt) - Fuzzer::StaticInterruptCallback(); - return TRUE; - case CTRL_BREAK_EVENT: - if (HandlerOpt->HandleTerm) - Fuzzer::StaticInterruptCallback(); - return TRUE; - } - return FALSE; -} - -void CALLBACK AlarmHandler(PVOID, BOOLEAN) { - Fuzzer::StaticAlarmCallback(); -} - -class TimerQ { - HANDLE TimerQueue; - public: - TimerQ() : TimerQueue(NULL) {}; - ~TimerQ() { - if (TimerQueue) - DeleteTimerQueueEx(TimerQueue, NULL); - }; - void SetTimer(int Seconds) { - if (!TimerQueue) { - TimerQueue = CreateTimerQueue(); - if (!TimerQueue) { - Printf("libFuzzer: CreateTimerQueue failed.\n"); - exit(1); - } - } - HANDLE Timer; - if (!CreateTimerQueueTimer(&Timer, TimerQueue, AlarmHandler, NULL, - Seconds*1000, Seconds*1000, 0)) { - Printf("libFuzzer: CreateTimerQueueTimer failed.\n"); - exit(1); - } - }; -}; - -static TimerQ Timer; - -static void CrashHandler(int) { Fuzzer::StaticCrashSignalCallback(); } - -void SetSignalHandler(const FuzzingOptions& Options) { - HandlerOpt = &Options; - - if (Options.UnitTimeoutSec > 0) - Timer.SetTimer(Options.UnitTimeoutSec / 2 + 1); - - if (Options.HandleInt || Options.HandleTerm) - if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) { - DWORD LastError = GetLastError(); - Printf("libFuzzer: SetConsoleCtrlHandler failed (Error code: %lu).\n", - LastError); - exit(1); - } - - if (Options.HandleSegv || Options.HandleBus || Options.HandleIll || - Options.HandleFpe) - SetUnhandledExceptionFilter(ExceptionHandler); - - if (Options.HandleAbrt) - if (SIG_ERR == signal(SIGABRT, CrashHandler)) { - Printf("libFuzzer: signal failed with %d\n", errno); - exit(1); - } -} - -void SleepSeconds(int Seconds) { Sleep(Seconds * 1000); } - -unsigned long GetPid() { return GetCurrentProcessId(); } - -size_t GetPeakRSSMb() { - PROCESS_MEMORY_COUNTERS info; - if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) - return 0; - return info.PeakWorkingSetSize >> 20; -} - -FILE *OpenProcessPipe(const char *Command, const char *Mode) { - return _popen(Command, Mode); -} - -int ExecuteCommand(const std::string &Command) { - return system(Command.c_str()); -} - -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - // TODO: make this implementation more efficient. - const char *Cdata = (const char *)Data; - const char *Cpatt = (const char *)Patt; - - if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) - return NULL; - - if (PattLen == 1) - return memchr(Data, *Cpatt, DataLen); - - const char *End = Cdata + DataLen - PattLen + 1; - - for (const char *It = Cdata; It < End; ++It) - if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) - return It; - - return NULL; -} - -std::string DisassembleCmd(const std::string &FileName) { - if (ExecuteCommand("dumpbin /summary > nul") == 0) - return "dumpbin /disasm " + FileName; - Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n"); - exit(1); -} - -std::string SearchRegexCmd(const std::string &Regex) { - return "findstr /r \"" + Regex + "\""; -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerValueBitMap.h llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerValueBitMap.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/FuzzerValueBitMap.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/FuzzerValueBitMap.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -//===- FuzzerValueBitMap.h - INTERNAL - Bit map -----------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// ValueBitMap. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_VALUE_BIT_MAP_H -#define LLVM_FUZZER_VALUE_BIT_MAP_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -// A bit map containing kMapSizeInWords bits. -struct ValueBitMap { - static const size_t kMapSizeInBits = 1 << 16; - static const size_t kMapPrimeMod = 65371; // Largest Prime < kMapSizeInBits; - static const size_t kBitsInWord = (sizeof(uintptr_t) * 8); - static const size_t kMapSizeInWords = kMapSizeInBits / kBitsInWord; - public: - - // Clears all bits. - void Reset() { memset(Map, 0, sizeof(Map)); } - - // Computes a hash function of Value and sets the corresponding bit. - // Returns true if the bit was changed from 0 to 1. - ATTRIBUTE_NO_SANITIZE_ALL - inline bool AddValue(uintptr_t Value) { - uintptr_t Idx = Value % kMapSizeInBits; - uintptr_t WordIdx = Idx / kBitsInWord; - uintptr_t BitIdx = Idx % kBitsInWord; - uintptr_t Old = Map[WordIdx]; - uintptr_t New = Old | (1UL << BitIdx); - Map[WordIdx] = New; - return New != Old; - } - - ATTRIBUTE_NO_SANITIZE_ALL - inline bool AddValueModPrime(uintptr_t Value) { - return AddValue(Value % kMapPrimeMod); - } - - inline bool Get(uintptr_t Idx) { - assert(Idx < kMapSizeInBits); - uintptr_t WordIdx = Idx / kBitsInWord; - uintptr_t BitIdx = Idx % kBitsInWord; - return Map[WordIdx] & (1UL << BitIdx); - } - - size_t SizeInBits() const { return kMapSizeInBits; } - - template - ATTRIBUTE_NO_SANITIZE_ALL - void ForEach(Callback CB) const { - for (size_t i = 0; i < kMapSizeInWords; i++) - if (uintptr_t M = Map[i]) - for (size_t j = 0; j < sizeof(M) * 8; j++) - if (M & ((uintptr_t)1 << j)) - CB(i * sizeof(M) * 8 + j); - } - - private: - uintptr_t Map[kMapSizeInWords] __attribute__((aligned(512))); -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_VALUE_BIT_MAP_H diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/README.txt llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/README.txt --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/README.txt 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/README.txt 2017-10-17 14:41:12.000000000 +0000 @@ -1,5 +1 @@ libFuzzer was moved to compiler-rt in https://reviews.llvm.org/D36908. -All future changes should be directed there. - -The copy of sources is temporarily left in this folder for the duration of a -move. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c --- llvm-toolchain-snapshot-6.0~svn315865/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,41 +0,0 @@ -/*===- StandaloneFuzzTargetMain.c - standalone main() for fuzz targets. ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This main() function can be linked to a fuzz target (i.e. a library -// that exports LLVMFuzzerTestOneInput() and possibly LLVMFuzzerInitialize()) -// instead of libFuzzer. This main() function will not perform any fuzzing -// but will simply feed all input files one by one to the fuzz target. -// -// Use this file to provide reproducers for bugs when linking against libFuzzer -// or other fuzzing engine is undesirable. -//===----------------------------------------------------------------------===*/ -#include -#include -#include - -extern int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size); -__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); -int main(int argc, char **argv) { - fprintf(stderr, "StandaloneFuzzTargetMain: running %d inputs\n", argc - 1); - if (LLVMFuzzerInitialize) - LLVMFuzzerInitialize(&argc, &argv); - for (int i = 1; i < argc; i++) { - fprintf(stderr, "Running: %s\n", argv[i]); - FILE *f = fopen(argv[i], "r"); - assert(f); - fseek(f, 0, SEEK_END); - size_t len = ftell(f); - fseek(f, 0, SEEK_SET); - unsigned char *buf = (unsigned char*)malloc(len); - size_t n_read = fread(buf, 1, len, f); - assert(n_read == len); - LLVMFuzzerTestOneInput(buf, len); - free(buf); - fprintf(stderr, "Done: %s: (%zd bytes)\n", argv[i], n_read); - } -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/FuzzMutate/FuzzerCLI.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/FuzzMutate/FuzzerCLI.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/FuzzMutate/FuzzerCLI.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/FuzzMutate/FuzzerCLI.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -47,7 +47,7 @@ Args.push_back("-O0"); } else if (Opt.startswith("O")) { Args.push_back("-" + Opt.str()); - } else if (Triple::getArchTypeForLLVMName(Opt)) { + } else if (Triple(Opt).getArch()) { Args.push_back("-mtriple=" + Opt.str()); } else { errs() << ExecName << ": Unknown option: " << Opt << ".\n"; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/IR/LLVMContext.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/IR/LLVMContext.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/IR/LLVMContext.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/IR/LLVMContext.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -59,6 +59,7 @@ {MD_section_prefix, "section_prefix"}, {MD_absolute_symbol, "absolute_symbol"}, {MD_associated, "associated"}, + {MD_callees, "callees"}, }; for (auto &MDKind : MDKinds) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/IR/MDBuilder.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/IR/MDBuilder.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/IR/MDBuilder.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/IR/MDBuilder.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -14,6 +14,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" using namespace llvm; @@ -95,6 +96,13 @@ return MDNode::get(Context, {createConstant(Lo), createConstant(Hi)}); } +MDNode *MDBuilder::createCallees(ArrayRef Callees) { + SmallVector Ops; + for (Function *F : Callees) + Ops.push_back(createConstant(F)); + return MDNode::get(Context, Ops); +} + MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) { // To ensure uniqueness the root node is self-referential. auto Dummy = MDNode::getTemporary(Context, None); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/MC/MCParser/AsmLexer.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/MC/MCParser/AsmLexer.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/MC/MCParser/AsmLexer.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/MC/MCParser/AsmLexer.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -606,8 +606,16 @@ return LexToken(); // Ignore whitespace. else return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); + case '\r': { + IsAtStartOfLine = true; + IsAtStartOfStatement = true; + // If this is a CR followed by LF, treat that as one token. + if (CurPtr != CurBuf.end() && *CurPtr == '\n') + ++CurPtr; + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - TokStart)); + } case '\n': - case '\r': IsAtStartOfLine = true; IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64InstrFormats.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64InstrFormats.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64InstrFormats.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64InstrFormats.td 2017-10-17 14:41:12.000000000 +0000 @@ -2516,6 +2516,22 @@ def am_indexed64 : ComplexPattern; def am_indexed128 : ComplexPattern; +def gi_am_indexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + class UImm12OffsetOperand : AsmOperandClass { let Name = "UImm12Offset" # Scale; let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; @@ -3146,6 +3162,23 @@ def am_unscaled64 : ComplexPattern; def am_unscaled128 :ComplexPattern; +def gi_am_unscaled8 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled16 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled128 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, string asm, list pattern> : I { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64InstructionSelector.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64InstructionSelector.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64InstructionSelector.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64InstructionSelector.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -66,6 +66,32 @@ ComplexRendererFn selectArithImmed(MachineOperand &Root) const; + ComplexRendererFn selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const; + + ComplexRendererFn selectAddrModeUnscaled8(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 1); + } + ComplexRendererFn selectAddrModeUnscaled16(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 2); + } + ComplexRendererFn selectAddrModeUnscaled32(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 4); + } + ComplexRendererFn selectAddrModeUnscaled64(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 8); + } + ComplexRendererFn selectAddrModeUnscaled128(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 16); + } + + ComplexRendererFn selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const; + template + ComplexRendererFn selectAddrModeIndexed(MachineOperand &Root) const { + return selectAddrModeIndexed(Root, Width / 8); + } + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -1392,6 +1418,109 @@ }}; } +/// Select a "register plus unscaled signed 9-bit immediate" address. This +/// should only match when there is an offset that is not valid for a scaled +/// immediate addressing mode. The "Size" argument is the size in bytes of the +/// memory reference, which is needed here to know what is valid for a scaled +/// immediate. +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + if (!isBaseWithConstantOffset(Root, MRI)) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + MachineOperand &OffImm = RootDef->getOperand(2); + if (!OffImm.isReg()) + return None; + MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); + if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + int64_t RHSC; + MachineOperand &RHSOp1 = RHS->getOperand(1); + if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) + return None; + RHSC = RHSOp1.getCImm()->getSExtValue(); + + // If the offset is valid as a scaled immediate, don't match here. + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) + return None; + if (RHSC >= -256 && RHSC < 256) { + MachineOperand &Base = RootDef->getOperand(1); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, + }}; + } + return None; +} + +/// Select a "register plus scaled unsigned 12-bit immediate" address. The +/// "Size" argument is the size in bytes of the memory reference, which +/// determines the scale. +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; + } + + if (isBaseWithConstantOffset(Root, MRI)) { + MachineOperand &LHS = RootDef->getOperand(1); + MachineOperand &RHS = RootDef->getOperand(2); + MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; + } + } + } + + // Before falling back to our general case, check if the unscaled + // instructions can handle this. If so, that's preferable. + if (selectAddrModeUnscaled(Root, Size).hasValue()) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64LegalizerInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64LegalizerInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64LegalizerInfo.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64LegalizerInfo.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -31,6 +31,7 @@ const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT s128 = LLT::scalar(128); const LLT v2s32 = LLT::vector(2, 32); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); @@ -229,7 +230,8 @@ setAction({G_INTTOPTR, 1, s64}, Legal); // Casts for 32 and 64-bit width type are just copies. - for (auto Ty : {s1, s8, s16, s32, s64}) { + // Same for 128-bit width type, except they are on the FPR bank. + for (auto Ty : {s1, s8, s16, s32, s64, s128}) { setAction({G_BITCAST, 0, Ty}, Legal); setAction({G_BITCAST, 1, Ty}, Legal); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64RegisterBankInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64RegisterBankInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AArch64/AArch64RegisterBankInfo.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AArch64/AArch64RegisterBankInfo.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -415,12 +415,10 @@ const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); - const MachineFunction &MF = *MI.getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. - if (!isPreISelGenericOpcode(Opc) || + if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || Opc == TargetOpcode::G_PHI) { const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); @@ -428,6 +426,11 @@ return Mapping; } + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. // Arithmetic ops. @@ -451,12 +454,39 @@ case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: return getSameKindOfOperandsMapping(MI); + case TargetOpcode::COPY: { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + // Check if one of the register is not a generic register. + if ((TargetRegisterInfo::isPhysicalRegister(DstReg) || + !MRI.getType(DstReg).isValid()) || + (TargetRegisterInfo::isPhysicalRegister(SrcReg) || + !MRI.getType(SrcReg).isValid())) { + const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); + const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); + if (!DstRB) + DstRB = SrcRB; + else if (!SrcRB) + SrcRB = DstRB; + // If both RB are null that means both registers are generic. + // We shouldn't be here. + assert(DstRB && SrcRB && "Both RegBank were nullptr"); + unsigned Size = getSizeInBits(DstReg, MRI, TRI); + return getInstructionMapping( + DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), + getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), + // We only care about the mapping of the destination. + /*NumOperands*/ 1); + } + // Both registers are generic, use G_BITCAST. + LLVM_FALLTHROUGH; + } case TargetOpcode::G_BITCAST: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); unsigned Size = DstTy.getSizeInBits(); - bool DstIsGPR = !DstTy.isVector(); - bool SrcIsGPR = !SrcTy.isVector(); + bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; + bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; const RegisterBank &DstRB = DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; const RegisterBank &SrcRB = @@ -464,7 +494,8 @@ return getInstructionMapping( DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), - /*NumOperands*/ 2); + // We only care about the mapping of the destination for COPY. + /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); } default: break; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AMDGPU/R600InstrInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AMDGPU/R600InstrInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AMDGPU/R600InstrInfo.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AMDGPU/R600InstrInfo.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -1186,10 +1186,8 @@ } const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - LE = MRI.livein_end(); - LI != LE; ++LI) { - unsigned Reg = LI->first; + for (std::pair LI : MRI.liveins()) { + unsigned Reg = LI.first; if (TargetRegisterInfo::isVirtualRegister(Reg) || !IndirectRC->contains(Reg)) continue; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AMDGPU/SIISelLowering.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AMDGPU/SIISelLowering.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/AMDGPU/SIISelLowering.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/AMDGPU/SIISelLowering.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -6507,8 +6507,7 @@ Node->getOperand(i)), 0)); } - DAG.UpdateNodeOperands(Node, Ops); - return Node; + return DAG.UpdateNodeOperands(Node, Ops); } /// \brief Fold the instructions after selecting them. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/BPF/BPFInstrInfo.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/BPF/BPFInstrInfo.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/BPF/BPFInstrInfo.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/BPF/BPFInstrInfo.td 2017-10-17 14:41:12.000000000 +0000 @@ -460,7 +460,7 @@ (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", [(set i64:$dst, - (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; + (BPFselectcc i64:$lhs, (i64immSExt32:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; } // load 64-bit global addr into register diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/BPF/BPFISelLowering.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/BPF/BPFISelLowering.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/BPF/BPFISelLowering.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/BPF/BPFISelLowering.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -611,11 +611,15 @@ .addReg(LHS) .addReg(MI.getOperand(2).getReg()) .addMBB(Copy1MBB); - else + else { + int64_t imm32 = MI.getOperand(2).getImm(); + // sanity check before we build J*_ri instruction. + assert (isInt<32>(imm32)); BuildMI(BB, DL, TII.get(NewCC)) .addReg(LHS) - .addImm(MI.getOperand(2).getImm()) + .addImm(imm32) .addMBB(Copy1MBB); + } // Copy0MBB: // %FalseValue = ... diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/BitTracker.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/BitTracker.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/BitTracker.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/BitTracker.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -181,8 +181,8 @@ } // end namespace llvm void BitTracker::print_cells(raw_ostream &OS) const { - for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) - dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; + for (const std::pair P : Map) + dbgs() << PrintReg(P.first, &ME.TRI) << " -> " << P.second << "\n"; } BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) @@ -830,18 +830,16 @@ << " cell: " << ME.getCell(RU, Map) << "\n"; } dbgs() << "Outputs:\n"; - for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end(); - I != E; ++I) { - RegisterRef RD(I->first); - dbgs() << " " << PrintReg(I->first, &ME.TRI) << " cell: " + for (const std::pair &P : ResMap) { + RegisterRef RD(P.first); + dbgs() << " " << PrintReg(P.first, &ME.TRI) << " cell: " << ME.getCell(RD, ResMap) << "\n"; } } // Iterate over all definitions of the instruction, and update the // cells accordingly. - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { // Visit register defs only. if (!MO.isReg() || !MO.isDef()) continue; @@ -926,14 +924,11 @@ ++It; } while (FallsThrough && It != End); - using succ_iterator = MachineBasicBlock::const_succ_iterator; - if (!DefaultToAll) { // Need to add all CFG successors that lead to EH landing pads. // There won't be explicit branches to these blocks, but they must // be processed. - for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) { - const MachineBasicBlock *SB = *I; + for (const MachineBasicBlock *SB : B.successors()) { if (SB->isEHPad()) Targets.insert(SB); } @@ -944,33 +939,27 @@ Targets.insert(&*Next); } } else { - for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) - Targets.insert(*I); + for (const MachineBasicBlock *SB : B.successors()) + Targets.insert(SB); } - for (unsigned i = 0, n = Targets.size(); i < n; ++i) { - int TargetN = Targets[i]->getNumber(); - FlowQ.push(CFGEdge(ThisN, TargetN)); - } + for (const MachineBasicBlock *TB : Targets) + FlowQ.push(CFGEdge(ThisN, TB->getNumber())); } void BT::visitUsesOf(unsigned Reg) { if (Trace) dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; - using use_iterator = MachineRegisterInfo::use_nodbg_iterator; - - use_iterator End = MRI.use_nodbg_end(); - for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) { - MachineInstr *UseI = I->getParent(); - if (!InstrExec.count(UseI)) + for (const MachineInstr &UseI : MRI.use_nodbg_instructions(Reg)) { + if (!InstrExec.count(&UseI)) continue; - if (UseI->isPHI()) - visitPHI(*UseI); - else if (!UseI->isBranch()) - visitNonBranch(*UseI); + if (UseI.isPHI()) + visitPHI(UseI); + else if (!UseI.isBranch()) + visitNonBranch(UseI); else - visitBranchesFrom(*UseI); + visitBranchesFrom(UseI); } } @@ -993,8 +982,8 @@ (void)NME; assert((OME-OMB == NME-NMB) && "Substituting registers of different lengths"); - for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { - RegisterCell &RC = I->second; + for (std::pair &P : Map) { + RegisterCell &RC = P.second; for (uint16_t i = 0, w = RC.width(); i < w; ++i) { BitValue &V = RC[i]; if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg) @@ -1045,10 +1034,9 @@ const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF); unsigned MaxBN = 0; - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - assert(I->getNumber() >= 0 && "Disconnected block"); - unsigned BN = I->getNumber(); + for (const MachineBasicBlock &B : MF) { + assert(B.getNumber() >= 0 && "Disconnected block"); + unsigned BN = B.getNumber(); if (BN > MaxBN) MaxBN = BN; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/HexagonBitTracker.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/HexagonBitTracker.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/HexagonBitTracker.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/HexagonBitTracker.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -60,12 +60,8 @@ // der the initial sequence of formal parameters that are known to be // passed via registers. unsigned InVirtReg, InPhysReg = 0; - const Function &F = *MF.getFunction(); - using arg_iterator = Function::const_arg_iterator; - - for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - const Argument &Arg = *I; + for (const Argument &Arg : MF.getFunction()->args()) { Type *ATy = Arg.getType(); unsigned Width = 0; if (ATy->isIntegerTy()) @@ -190,8 +186,7 @@ unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; NumDefs++; @@ -240,8 +235,7 @@ // checking what kind of operand a given instruction has individually // for each instruction, do it here. Global symbols as operands gene- // rally do not provide any useful information. - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() || MO.isCPI()) return false; @@ -1254,11 +1248,8 @@ } unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { - using iterator = MachineRegisterInfo::livein_iterator; - - for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { - if (I->first == PReg) - return I->second; - } + for (std::pair P : MRI.liveins()) + if (P.first == PReg) + return P.second; return 0; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/HexagonConstExtenders.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/HexagonConstExtenders.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/HexagonConstExtenders.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/HexagonConstExtenders.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -404,6 +404,7 @@ using HCE = HexagonConstExtenders; + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { if (OR.Min > OR.Max) OS << '!'; @@ -418,6 +419,7 @@ const HexagonRegisterInfo &HRI; }; + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { if (P.Rs.Reg != 0) OS << PrintReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); @@ -433,6 +435,7 @@ const HexagonRegisterInfo &HRI; }; + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { OS << "## " << (P.Ex.Neg ? "- " : "+ "); if (P.Ex.Rs.Reg != 0) @@ -450,12 +453,14 @@ const HexagonRegisterInfo &HRI; }; + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { OS << '[' << P.ExtI.first << ", " << PrintExpr(P.ExtI.second, P.HRI) << ']'; return OS; } + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { assert(ED.OpNum != -1u); const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); @@ -472,6 +477,7 @@ return OS; } + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { switch (ER.Kind) { case MachineOperand::MO_Immediate: @@ -505,6 +511,7 @@ return OS; } + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; return OS; @@ -517,6 +524,7 @@ const HexagonRegisterInfo &HRI; }; + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { OS << "{\n"; for (const std::pair &Q : P.IMap) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/RDFGraph.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/RDFGraph.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Hexagon/RDFGraph.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Hexagon/RDFGraph.cpp 2017-10-17 14:41:12.000000000 +0000 @@ -913,8 +913,8 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); MachineBasicBlock &EntryB = *EA.Addr->getCode(); assert(EntryB.pred_empty() && "Function entry block has predecessors"); - for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) - LiveIns.insert(RegisterRef(I->first)); + for (std::pair P : MRI.liveins()) + LiveIns.insert(RegisterRef(P.first)); if (MRI.tracksLiveness()) { for (auto I : EntryB.liveins()) LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask)); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MicroMipsInstrFormats.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MicroMipsInstrFormats.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MicroMipsInstrFormats.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MicroMipsInstrFormats.td 2017-10-17 14:41:12.000000000 +0000 @@ -786,13 +786,14 @@ } class BC1F_FM_MM tf> : MMArch { + bits<3> fcc; bits<16> offset; bits<32> Inst; let Inst{31-26} = 0x10; let Inst{25-21} = tf; - let Inst{20-18} = 0x0; // cc + let Inst{20-18} = fcc; // cc let Inst{17-16} = 0x0; let Inst{15-0} = offset; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MicroMipsInstrFPU.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MicroMipsInstrFPU.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MicroMipsInstrFPU.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MicroMipsInstrFPU.td 2017-10-17 14:41:12.000000000 +0000 @@ -58,10 +58,16 @@ bits<3> fcc = 0; } -def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, - BC1F_FM_MM<0x1c>, ISA_MICROMIPS32_NOT_MIPS32R6; -def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>, - BC1F_FM_MM<0x1d>, ISA_MICROMIPS32_NOT_MIPS32R6; +} + +let DecoderNamespace = "MicroMips" in { + def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, + BC1F_FM_MM<0x1c>, ISA_MICROMIPS32_NOT_MIPS32R6; + def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>, + BC1F_FM_MM<0x1d>, ISA_MICROMIPS32_NOT_MIPS32R6; +} + +let isCodeGenOnly = 1 in { def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, ROUND_W_FM_MM<0, 0x24>, ISA_MICROMIPS; def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MicroMipsInstrInfo.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MicroMipsInstrInfo.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MicroMipsInstrInfo.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MicroMipsInstrInfo.td 2017-10-17 14:41:12.000000000 +0000 @@ -1006,20 +1006,14 @@ // MicroMips arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// -def : MipsPat<(i32 immLi16:$imm), - (LI16_MM immLi16:$imm)>; - -let AdditionalPredicates = [InMicroMips] in -defm : MaterializeImms; - -let Predicates = [InMicroMips] in { +let AdditionalPredicates = [InMicroMips] in { def : MipsPat<(i32 immLi16:$imm), (LI16_MM immLi16:$imm)>; - def : MipsPat<(i32 immSExt16:$imm), - (ADDiu_MM ZERO, immSExt16:$imm)>; - def : MipsPat<(i32 immZExt16:$imm), - (ORi_MM ZERO, immZExt16:$imm)>; + defm : MaterializeImms; +} + +let Predicates = [InMicroMips] in { def : MipsPat<(not GPRMM16:$in), (NOT16_MM GPRMM16:$in)>; def : MipsPat<(not GPR32:$in), diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MipsInstrFPU.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MipsInstrFPU.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MipsInstrFPU.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MipsInstrFPU.td 2017-10-17 14:41:12.000000000 +0000 @@ -641,17 +641,17 @@ def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>, - BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; -def BC1FL : MMRel, BC1XL_FT<"bc1fl", brtarget, II_BC1FL>, - BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6; -def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>, - BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; -def BC1TL : MMRel, BC1XL_FT<"bc1tl", brtarget, II_BC1TL>, - BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6; +let AdditionalPredicates = [NotInMicroMips] in { + def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>, + BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; + def BC1FL : MMRel, BC1XL_FT<"bc1fl", brtarget, II_BC1FL>, + BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6; + def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>, + BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; + def BC1TL : MMRel, BC1XL_FT<"bc1tl", brtarget, II_BC1TL>, + BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6; /// Floating Point Compare -let AdditionalPredicates = [NotInMicroMips] in { def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>, ISA_MIPS1_NOT_32R6_64R6 { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MipsInstrInfo.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MipsInstrInfo.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/Mips/MipsInstrInfo.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/Mips/MipsInstrInfo.td 2017-10-17 14:41:12.000000000 +0000 @@ -1222,6 +1222,20 @@ (N->getZExtValue() <= 64); }]>; +def ORiPred : PatLeaf<(imm), [{ + return isUInt<16>(N->getZExtValue()) && !isInt<16>(N->getSExtValue()); +}], LO16>; + +def LUiPred : PatLeaf<(imm), [{ + int64_t Val = N->getSExtValue(); + return !isInt<16>(Val) && isInt<32>(Val) && !(Val & 0xffff); +}]>; + +def LUiORiPred : PatLeaf<(imm), [{ + int64_t SVal = N->getSExtValue(); + return isInt<32>(SVal) && (SVal & 0xffff); +}]>; + // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def addr : @@ -2716,15 +2730,20 @@ Instruction ADDiuOp, Instruction LUiOp, Instruction ORiOp> { -// Small immediates -def : MipsPat<(VT immSExt16:$imm), (ADDiuOp ZEROReg, imm:$imm)>; -def : MipsPat<(VT immZExt16:$imm), (ORiOp ZEROReg, imm:$imm)>; +// Constant synthesis previously relied on the ordering of the patterns below. +// By making the predicates they use non-overlapping, the patterns were +// reordered so that the effect of the newly introduced predicates can be +// observed. + +// Arbitrary immediates +def : MipsPat<(VT LUiORiPred:$imm), (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>; // Bits 32-16 set, sign/zero extended. -def : MipsPat<(VT immSExt32Low16Zero:$imm), (LUiOp (HI16 imm:$imm))>; +def : MipsPat<(VT LUiPred:$imm), (LUiOp (HI16 imm:$imm))>; -// Arbitrary immediates -def : MipsPat<(VT immSExt32:$imm), (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>; +// Small immediates +def : MipsPat<(VT ORiPred:$imm), (ORiOp ZEROReg, imm:$imm)>; +def : MipsPat<(VT immSExt16:$imm), (ADDiuOp ZEROReg, imm:$imm)>; } let AdditionalPredicates = [NotInMicroMips] in diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCFrameLowering.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCFrameLowering.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCFrameLowering.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCFrameLowering.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -312,11 +312,9 @@ // Live in and live out values already must be in the mask, so don't bother // marking them. - for (MachineRegisterInfo::livein_iterator - I = MF->getRegInfo().livein_begin(), - E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = TRI->getEncodingValue(I->first); - if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. + for (std::pair LI : MF->getRegInfo().liveins()) { + unsigned RegNo = TRI->getEncodingValue(LI.first); + if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCInstrInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCInstrInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCInstrInfo.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCInstrInfo.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -260,6 +260,7 @@ switch (MI.getOpcode()) { default: return false; case PPC::EXTSW: + case PPC::EXTSW_32: case PPC::EXTSW_32_64: SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -2103,3 +2104,243 @@ int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { return PPC::getRecordFormOpcode(Opcode); } + +// This function returns true if the machine instruction +// always outputs a value by sign-extending a 32 bit value, +// i.e. 0 to 31-th bits are same as 32-th bit. +static bool isSignExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8 || + Opcode == PPC::SRAW || Opcode == PPC::SRAWo || + Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo || + Opcode == PPC::LWA || Opcode == PPC::LWAX || + Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 || + Opcode == PPC::LHA || Opcode == PPC::LHAX || + Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo || + Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo || + Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || + Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo || + Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || + Opcode == PPC::EXTSB8_32_64) + return true; + + if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) + return true; + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) && + MI.getOperand(3).getImm() > 0 && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + return false; +} + +// This function returns true if the machine instruction +// always outputs zeros in higher 32 bits. +static bool isZeroExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + // The 16-bit immediate is sign-extended in li/lis. + // If the most significant bit is zero, all higher bits are zero. + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8) { + int64_t Imm = MI.getOperand(1).getImm(); + if (((uint64_t)Imm & ~0x7FFFuLL) == 0) + return true; + } + + // We have some variations of rotate-and-mask instructions + // that clear higher 32-bits. + if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo || + Opcode == PPC::RLDICL_32_64) && + MI.getOperand(3).getImm() >= 32) + return true; + + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI.getOperand(3).getImm() >= 32 && + MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) + return true; + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + // There are other instructions that clear higher 32-bits. + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 || + Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo || + Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || + Opcode == PPC::SLW || Opcode == PPC::SLWo || + Opcode == PPC::SRW || Opcode == PPC::SRWo || + Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || + Opcode == PPC::SLWI || Opcode == PPC::SLWIo || + Opcode == PPC::SRWI || Opcode == PPC::SRWIo || + Opcode == PPC::LWZ || Opcode == PPC::LWZX || + Opcode == PPC::LWZU || Opcode == PPC::LWZUX || + Opcode == PPC::LWBRX || Opcode == PPC::LHBRX || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || + Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 || + Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::ANDIo || Opcode == PPC::ANDISo || + Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo || + Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo || + Opcode == PPC::MFVSRWZ) + return true; + + return false; +} + +// We limit the max depth to track incoming values of PHIs or binary ops +// (e.g. AND) to avoid exsessive cost. +const unsigned MAX_DEPTH = 1; + +bool +PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned Depth) const { + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + + switch (MI.getOpcode()) { + case PPC::COPY: { + unsigned SrcReg = MI.getOperand(1).getReg(); + + // In both ELFv1 and v2 ABI, method parameters and the return value + // are sign- or zero-extended. + if (MF->getSubtarget().isSVR4ABI()) { + const PPCFunctionInfo *FuncInfo = MF->getInfo(); + // We check the ZExt/SExt flags for a method parameter. + if (MI.getParent()->getBasicBlock() == + &MF->getFunction()->getEntryBlock()) { + unsigned VReg = MI.getOperand(0).getReg(); + if (MF->getRegInfo().isLiveIn(VReg)) + return SignExt ? FuncInfo->isLiveInSExt(VReg) : + FuncInfo->isLiveInZExt(VReg); + } + + // For a method return value, we check the ZExt/SExt flags in attribute. + // We assume the following code sequence for method call. + // ADJCALLSTACKDOWN 32, %R1, %R1 + // BL8_NOP ,... + // ADJCALLSTACKUP 32, 0, %R1, %R1 + // %vreg5 = COPY %X3; G8RC:%vreg5 + if (SrcReg == PPC::X3) { + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::const_instr_iterator II = + MachineBasicBlock::const_instr_iterator(&MI); + if (II != MBB->instr_begin() && + (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { + const MachineInstr &CallMI = *(--II); + if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) { + const Function *CalleeFn = + dyn_cast(CallMI.getOperand(0).getGlobal()); + if (!CalleeFn) + return false; + const IntegerType *IntTy = + dyn_cast(CalleeFn->getReturnType()); + const AttributeSet &Attrs = + CalleeFn->getAttributes().getRetAttributes(); + if (IntTy && IntTy->getBitWidth() <= 32) + return Attrs.hasAttribute(SignExt ? Attribute::SExt : + Attribute::ZExt); + } + } + } + } + + // If this is a copy from another register, we recursively check source. + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, Depth); + + return false; + } + + case PPC::ANDIo: + case PPC::ANDISo: + case PPC::ORI: + case PPC::ORIS: + case PPC::XORI: + case PPC::XORIS: + case PPC::ANDIo8: + case PPC::ANDISo8: + case PPC::ORI8: + case PPC::ORIS8: + case PPC::XORI8: + case PPC::XORIS8: { + // logical operation with 16-bit immediate does not change the upper bits. + // So, we track the operand register as we do for register copy. + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, Depth); + + return false; + } + + // If all incoming values are sign-/zero-extended, + // the output of AND, OR, ISEL or PHI is also sign-/zero-extended. + case PPC::AND: + case PPC::AND8: + case PPC::OR: + case PPC::OR8: + case PPC::ISEL: + case PPC::PHI: { + if (Depth >= MAX_DEPTH) + return false; + + // The input registers for PHI are operand 1, 3, ... + // The input registers for others are operand 1 and 2. + unsigned E = 3, D = 1; + if (MI.getOpcode() == PPC::PHI) { + E = MI.getNumOperands(); + D = 2; + } + + for (unsigned I = 1; I != E; I += D) { + if (MI.getOperand(I).isReg()) { + unsigned SrcReg = MI.getOperand(I).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) + return false; + } + else + return false; + } + return true; + } + + default: + return SignExt?isSignExtendingOp(MI): + isZeroExtendingOp(MI); + } + return false; +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCInstrInfo.h llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCInstrInfo.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCInstrInfo.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCInstrInfo.h 2017-10-17 14:41:13.000000000 +0000 @@ -293,6 +293,21 @@ } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; static int getRecordFormOpcode(unsigned Opcode); + + bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned PhiDepth) const; + + /// Return true if the output of the instruction is always a sign-extended, + /// i.e. 0 to 31-th bits are same as 32-th bit. + bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, true, depth); + } + + /// Return true if the output of the instruction is always zero-extended, + /// i.e. 0 to 31-th bits are all zeros + bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, false, depth); + } }; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCISelLowering.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCISelLowering.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCISelLowering.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCISelLowering.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -3618,6 +3618,7 @@ if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; @@ -3652,6 +3653,7 @@ break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { @@ -3688,6 +3690,7 @@ // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) @@ -3733,6 +3736,7 @@ // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -43,3 +43,17 @@ "func_toc" + Twine(MF.getFunctionNumber())); } + +bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isSExt(); + return false; +} + +bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isZExt(); + return false; +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCMachineFunctionInfo.h llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCMachineFunctionInfo.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCMachineFunctionInfo.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCMachineFunctionInfo.h 2017-10-17 14:41:13.000000000 +0000 @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetCallingConv.h" namespace llvm { @@ -113,6 +114,10 @@ /// copies bool IsSplitCSR = false; + /// We keep track attributes for each live-in virtual registers + /// to use SExt/ZExt flags in later optimization. + std::vector> LiveInAttrs; + public: explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {} @@ -175,6 +180,19 @@ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + /// This function associates attributes for each live-in virtual register. + void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) { + LiveInAttrs.push_back(std::make_pair(VReg, Flags)); + } + + /// This function returns true if the spesified vreg is + /// a live-in register and sign-extended. + bool isLiveInSExt(unsigned VReg) const; + + /// This function returns true if the spesified vreg is + /// a live-in register and zero-extended. + bool isLiveInZExt(unsigned VReg) const; + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCMIPeephole.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCMIPeephole.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/PowerPC/PPCMIPeephole.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/PowerPC/PPCMIPeephole.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -29,14 +29,27 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" #include "MCTargetDesc/PPCPredicates.h" using namespace llvm; #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); +STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); +static cl::opt + EnableSExtElimination("ppc-eliminate-signext", + cl::desc("enable elimination of sign-extensions"), + cl::init(true), cl::Hidden); + +static cl::opt + EnableZExtElimination("ppc-eliminate-zeroext", + cl::desc("enable elimination of zero-extensions"), + cl::init(true), cl::Hidden); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -110,6 +123,59 @@ return MRI->getVRegDef(Reg); } +// This function returns number of known zero bits in output of MI +// starting from the most significant bit. +static unsigned +getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { + unsigned Opcode = MI->getOpcode(); + if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo) + return MI->getOperand(3).getImm(); + + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) + return MI->getOperand(3).getImm(); + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) + return 32 + MI->getOperand(3).getImm(); + + if (Opcode == PPC::ANDIo) { + uint16_t Imm = MI->getOperand(2).getImm(); + return 48 + countLeadingZeros(Imm); + } + + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8) + // The result ranges from 0 to 32. + return 58; + + if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo) + // The result ranges from 0 to 64. + return 57; + + if (Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8) + return 48; + + if (Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8) + return 56; + + if (TII->isZeroExtended(*MI)) + return 32; + + return 0; +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -367,6 +433,156 @@ } break; } + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: { + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (SrcMI->getOpcode() == PPC::LHZ || + SrcMI->getOpcode() == PPC::LHZX) { + if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) + break; + auto is64Bit = [] (unsigned Opcode) { + return Opcode == PPC::EXTSH8; + }; + auto isXForm = [] (unsigned Opcode) { + return Opcode == PPC::LHZX; + }; + auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { + if (is64Bit) + if (isXForm) return PPC::LHAX8; + else return PPC::LHA8; + else + if (isXForm) return PPC::LHAX; + else return PPC::LHA; + }; + unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), + isXForm(SrcMI->getOpcode())); + DEBUG(dbgs() << "Zero-extending load\n"); + DEBUG(SrcMI->dump()); + DEBUG(dbgs() << "and sign-extension\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "are merged into sign-extending load\n"); + SrcMI->setDesc(TII->get(Opc)); + SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::EXTSW: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: { + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (SrcMI->getOpcode() == PPC::LWZ || + SrcMI->getOpcode() == PPC::LWZX) { + if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) + break; + auto is64Bit = [] (unsigned Opcode) { + return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64; + }; + auto isXForm = [] (unsigned Opcode) { + return Opcode == PPC::LWZX; + }; + auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { + if (is64Bit) + if (isXForm) return PPC::LWAX; + else return PPC::LWA; + else + if (isXForm) return PPC::LWAX_32; + else return PPC::LWA_32; + }; + unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), + isXForm(SrcMI->getOpcode())); + DEBUG(dbgs() << "Zero-extending load\n"); + DEBUG(SrcMI->dump()); + DEBUG(dbgs() << "and sign-extension\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "are merged into sign-extending load\n"); + SrcMI->setDesc(TII->get(Opc)); + SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } else if (MI.getOpcode() == PPC::EXTSW_32_64 && + TII->isSignExtended(*SrcMI)) { + // We can eliminate EXTSW if the input is known to be already + // sign-extended. + DEBUG(dbgs() << "Removing redundant sign-extension\n"); + unsigned TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), + TmpReg); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), + MI.getOperand(0).getReg()) + .addReg(TmpReg) + .addReg(NarrowReg) + .addImm(PPC::sub_32); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::RLDICL: { + // We can eliminate RLDICL (e.g. for zero-extension) + // if all bits to clear are already zero in the input. + // This code assume following code sequence for zero-extension. + // %vreg6 = COPY %vreg5:sub_32; (optional) + // %vreg8 = IMPLICIT_DEF; + // %vreg7 = INSERT_SUBREG %vreg8, %vreg6, sub_32; + if (!EnableZExtElimination) break; + + if (MI.getOperand(2).getImm() != 0) + break; + + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG && + SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg())) + break; + + MachineInstr *ImpDefMI, *SubRegMI; + ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg()); + if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break; + + SrcMI = SubRegMI; + if (SubRegMI->getOpcode() == PPC::COPY) { + unsigned CopyReg = SubRegMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(CopyReg)) + SrcMI = MRI->getVRegDef(CopyReg); + } + + unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII); + if (MI.getOperand(3).getImm() <= KnownZeroCount) { + DEBUG(dbgs() << "Removing redundant zero-extension\n"); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .addReg(SrcReg); + ToErase = &MI; + Simplified = true; + NumEliminatedZExt++; + } + break; + } // TODO: Any instruction that has an immediate form fed only by a PHI // whose operands are all load immediate can be folded away. We currently diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/AsmParser/X86AsmParser.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/AsmParser/X86AsmParser.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/AsmParser/X86AsmParser.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -2330,7 +2330,6 @@ } } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); // Determine whether this is an instruction prefix. // FIXME: @@ -2340,22 +2339,48 @@ // lock addq %rax, %rbx ; Destination operand must be of memory type // xacquire ; xacquire must be accompanied by 'lock' bool isPrefix = StringSwitch(Name) - .Cases("lock", - "rep", "repe", - "repz", "repne", - "repnz", "rex64", - "data32", "data16", true) - .Cases("xacquire", "xrelease", true) - .Cases("acquire", "release", isParsingIntelSyntax()) - .Default(false); + .Cases("rex64", "data32", "data16", true) + .Cases("xacquire", "xrelease", true) + .Cases("acquire", "release", isParsingIntelSyntax()) + .Default(false); + + auto isLockRepeatPrefix = [](StringRef N) { + return StringSwitch(N) + .Cases("lock", "rep", "repe", "repz", "repne", "repnz", true) + .Default(false); + }; bool CurlyAsEndOfStatement = false; + + unsigned Flags = X86::IP_NO_PREFIX; + while (isLockRepeatPrefix(Name.lower())) { + unsigned Prefix = + StringSwitch(Name) + .Cases("lock", "lock", X86::IP_HAS_LOCK) + .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) + .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) + .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) + Flags |= Prefix; + Name = Parser.getTok().getString(); + Parser.Lex(); // eat the prefix + // Hack: we could have something like + // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" + while (Name.startswith(";") || Name.startswith("\n") || + Name.startswith("\t") || Name.startswith("/")) { + Name = Parser.getTok().getString(); + Parser.Lex(); // go to next prefix or instr + } + } + + if (Flags) + PatchedName = Name; + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + // This does the actual operand parsing. Don't parse any more if we have a // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we // just want to parse the "lock" as the first instruction and the "incl" as // the next one. if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { - // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) Operands.push_back(X86Operand::CreateToken("*", consumeToken())); @@ -2593,6 +2618,8 @@ } } + if (Flags) + Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); return false; } @@ -2660,6 +2687,16 @@ return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); } +static unsigned getPrefixes(OperandVector &Operands) { + unsigned Result = 0; + X86Operand &Prefix = static_cast(*Operands.back()); + if (Prefix.isPrefix()) { + Result = Prefix.getPrefix(); + Operands.pop_back(); + } + return Result; +} + bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2674,8 +2711,13 @@ MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); bool WasOriginallyInvalidOperand = false; + unsigned Prefixes = getPrefixes(Operands); + MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // First, try a direct match. switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { @@ -2840,12 +2882,16 @@ StringRef Mnemonic = Op.getToken(); SMRange EmptyRange = None; StringRef Base = Op.getToken(); + unsigned Prefixes = getPrefixes(Operands); // First, handle aliases that expand to multiple instructions. MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // Find one unsized memory operand, if present. X86Operand *UnsizedMemOp = nullptr; for (const auto &Op : Operands) { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/AsmParser/X86Operand.h llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/AsmParser/X86Operand.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/AsmParser/X86Operand.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/AsmParser/X86Operand.h 2017-10-17 14:41:13.000000000 +0000 @@ -28,12 +28,7 @@ /// X86Operand - Instances of this class represent a parsed X86 machine /// instruction. struct X86Operand : public MCParsedAsmOperand { - enum KindTy { - Token, - Register, - Immediate, - Memory - } Kind; + enum KindTy { Token, Register, Immediate, Memory, Prefix } Kind; SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; @@ -50,6 +45,10 @@ unsigned RegNo; }; + struct PrefOp { + unsigned Prefixes; + }; + struct ImmOp { const MCExpr *Val; }; @@ -73,6 +72,7 @@ struct RegOp Reg; struct ImmOp Imm; struct MemOp Mem; + struct PrefOp Pref; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -111,6 +111,11 @@ return Reg.RegNo; } + unsigned getPrefix() const { + assert(Kind == Prefix && "Invalid access!"); + return Pref.Prefixes; + } + const MCExpr *getImm() const { assert(Kind == Immediate && "Invalid access!"); return Imm.Val; @@ -387,6 +392,7 @@ return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 64); } + bool isPrefix() const { return Kind == Prefix; } bool isReg() const override { return Kind == Register; } bool isGR32orGR64() const { @@ -509,6 +515,13 @@ return Res; } + static std::unique_ptr + CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc) { + auto Res = llvm::make_unique(Prefix, StartLoc, EndLoc); + Res->Pref.Prefixes = Prefixes; + return Res; + } + static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc) { auto Res = llvm::make_unique(Immediate, StartLoc, EndLoc); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/Disassembler/X86Disassembler.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/Disassembler/X86Disassembler.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/Disassembler/X86Disassembler.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/Disassembler/X86Disassembler.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -74,6 +74,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" @@ -232,7 +233,24 @@ return Fail; } else { Size = InternalInstr.length; - return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; + bool Ret = translateInstruction(Instr, InternalInstr, this); + if (!Ret) { + unsigned Flags = X86::IP_NO_PREFIX; + if (InternalInstr.hasAdSize) + Flags |= X86::IP_HAS_AD_SIZE; + if (!InternalInstr.mandatoryPrefix) { + if (InternalInstr.hasOpSize) + Flags |= X86::IP_HAS_OP_SIZE; + if (InternalInstr.repeatPrefix == 0xf2) + Flags |= X86::IP_HAS_REPEAT_NE; + else if (InternalInstr.repeatPrefix == 0xf3 && + // It should not be 'pause' f3 90 + InternalInstr.opcode != 0x90) + Flags |= X86::IP_HAS_REPEAT; + } + Instr.setFlags(Flags); + } + return (!Ret) ? Success : Fail; } } @@ -315,12 +333,12 @@ unsigned baseRegNo; if (insn.mode == MODE_64BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; + baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; else if (insn.mode == MODE_32BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; + baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; else { assert(insn.mode == MODE_16BIT); - baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; + baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; } MCOperand baseReg = MCOperand::createReg(baseRegNo); mcInst.addOperand(baseReg); @@ -340,12 +358,12 @@ unsigned baseRegNo; if (insn.mode == MODE_64BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; + baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; else if (insn.mode == MODE_32BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; + baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; else { assert(insn.mode == MODE_16BIT); - baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; + baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; } MCOperand baseReg = MCOperand::createReg(baseRegNo); mcInst.addOperand(baseReg); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -277,38 +277,44 @@ insn->dlog(insn->dlogArg, buffer); } -/* - * setPrefixPresent - Marks that a particular prefix is present at a particular - * location. - * - * @param insn - The instruction to be marked as having the prefix. - * @param prefix - The prefix that is present. - * @param location - The location where the prefix is located (in the address - * space of the instruction's reader). - */ -static void setPrefixPresent(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - insn->prefixPresent[prefix] = 1; - insn->prefixLocations[prefix] = location; +static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { + if (insn->mode == MODE_64BIT) + return prefix >= 0x40 && prefix <= 0x4f; + return false; } /* - * isPrefixAtLocation - Queries an instruction to determine whether a prefix is - * present at a given location. + * setPrefixPresent - Marks that a particular prefix is present as mandatory * - * @param insn - The instruction to be queried. - * @param prefix - The prefix. - * @param location - The location to query. - * @return - Whether the prefix is at that location. + * @param insn - The instruction to be marked as having the prefix. + * @param prefix - The prefix that is present. */ -static bool isPrefixAtLocation(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - return insn->prefixPresent[prefix] == 1 && - insn->prefixLocations[prefix] == location; +static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) { + uint8_t nextByte; + switch (prefix) { + case 0xf2: + case 0xf3: + if (lookAtByte(insn, &nextByte)) + break; + // TODO: + // 1. There could be several 0x66 + // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then + // it's not mandatory prefix + // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need + // 0x0f exactly after it to be mandatory prefix + if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) + // The last of 0xf2 /0xf3 is mandatory prefix + insn->mandatoryPrefix = prefix; + insn->repeatPrefix = prefix; + break; + case 0x66: + if (lookAtByte(insn, &nextByte)) + break; + // 0x66 can't overwrite existing mandatory prefix and should be ignored + if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) + insn->mandatoryPrefix = prefix; + break; + } } /* @@ -322,19 +328,12 @@ */ static int readPrefixes(struct InternalInstruction* insn) { bool isPrefix = true; - bool prefixGroups[4] = { false }; - uint64_t prefixLocation; uint8_t byte = 0; uint8_t nextByte; - bool hasAdSize = false; - bool hasOpSize = false; - dbgprintf(insn, "readPrefixes()"); while (isPrefix) { - prefixLocation = insn->readerCursor; - /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ if (consumeByte(insn, &byte)) break; @@ -343,13 +342,10 @@ * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK break; - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { + if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -357,39 +353,41 @@ * - it is followed by an xchg instruction * then it should be disassembled as a xacquire/xrelease not repne/rep. */ - if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) || - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + if (((nextByte == 0xf0) || + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { insn->xAcquireRelease = true; + if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support + break; + } /* * Also if the byte is 0xf3, and the following condition is met: * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or * "mov mem, imm" (opcode 0xc6/0xc7) instructions. * then it should be disassembled as an xrelease not rep. */ - if (byte == 0xf3 && - (nextByte == 0x88 || nextByte == 0x89 || - nextByte == 0xc6 || nextByte == 0xc7)) + if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || + nextByte == 0xc6 || nextByte == 0xc7)) { insn->xAcquireRelease = true; - if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { - if (consumeByte(insn, &nextByte)) + if (nextByte != 0x90) // PAUSE instruction support + break; + } + if (isREX(insn, nextByte)) { + uint8_t nnextByte; + // Go to REX prefix after the current one + if (consumeByte(insn, &nnextByte)) return -1; - if (lookAtByte(insn, &nextByte)) + // We should be able to read next byte after REX prefix + if (lookAtByte(insn, &nnextByte)) return -1; unconsumeByte(insn); } - if (nextByte != 0x0f && nextByte != 0x90) - break; } switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ - if (prefixGroups[0]) - dbgprintf(insn, "Redundant Group 1 prefix"); - prefixGroups[0] = true; - setPrefixPresent(insn, byte, prefixLocation); + setPrefixPresent(insn, byte); break; case 0x2e: /* CS segment override -OR- Branch not taken */ case 0x36: /* SS segment override -OR- Branch taken */ @@ -420,24 +418,15 @@ debug("Unhandled override"); return -1; } - if (prefixGroups[1]) - dbgprintf(insn, "Redundant Group 2 prefix"); - prefixGroups[1] = true; - setPrefixPresent(insn, byte, prefixLocation); + setPrefixPresent(insn, byte); break; case 0x66: /* Operand-size override */ - if (prefixGroups[2]) - dbgprintf(insn, "Redundant Group 3 prefix"); - prefixGroups[2] = true; - hasOpSize = true; - setPrefixPresent(insn, byte, prefixLocation); + insn->hasOpSize = true; + setPrefixPresent(insn, byte); break; case 0x67: /* Address-size override */ - if (prefixGroups[3]) - dbgprintf(insn, "Redundant Group 4 prefix"); - prefixGroups[3] = true; - hasAdSize = true; - setPrefixPresent(insn, byte, prefixLocation); + insn->hasAdSize = true; + setPrefixPresent(insn, byte); break; default: /* Not a prefix byte */ isPrefix = false; @@ -469,7 +458,6 @@ } else { unconsumeByte(insn); /* unconsume byte1 */ unconsumeByte(insn); /* unconsume byte */ - insn->necessaryPrefixLocation = insn->readerCursor - 2; } if (insn->vectorExtensionType == TYPE_EVEX) { @@ -505,13 +493,10 @@ return -1; } - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) insn->vectorExtensionType = TYPE_VEX_3B; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } else { + else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } if (insn->vectorExtensionType == TYPE_VEX_3B) { insn->vectorExtensionPrefix[0] = byte; @@ -520,13 +505,12 @@ /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); - } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], @@ -540,26 +524,24 @@ return -1; } - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) insn->vectorExtensionType = TYPE_VEX_2B; - } else { + else unconsumeByte(insn); - } if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->vectorExtensionPrefix[0] = byte; consumeByte(insn, &insn->vectorExtensionPrefix[1]); - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); - } switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { default: break; case VEX_PREFIX_66: - hasOpSize = true; + insn->hasOpSize = true; break; } @@ -575,13 +557,10 @@ return -1; } - if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ + if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */ insn->vectorExtensionType = TYPE_XOP; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } else { + else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } if (insn->vectorExtensionType == TYPE_XOP) { insn->vectorExtensionPrefix[0] = byte; @@ -590,19 +569,18 @@ /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); - } switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { default: break; case VEX_PREFIX_66: - hasOpSize = true; + insn->hasOpSize = true; break; } @@ -610,51 +588,35 @@ insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2]); } - } else { - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; - - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; - } - - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } + } else if (isREX(insn, byte)) { + if (lookAtByte(insn, &nextByte)) + return -1; + insn->rexPrefix = byte; + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else + unconsumeByte(insn); if (insn->mode == MODE_16BIT) { - insn->registerSize = (hasOpSize ? 4 : 2); - insn->addressSize = (hasAdSize ? 4 : 2); - insn->displacementSize = (hasAdSize ? 4 : 2); - insn->immediateSize = (hasOpSize ? 4 : 2); + insn->registerSize = (insn->hasOpSize ? 4 : 2); + insn->addressSize = (insn->hasAdSize ? 4 : 2); + insn->displacementSize = (insn->hasAdSize ? 4 : 2); + insn->immediateSize = (insn->hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 2 : 4); - insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (insn->hasOpSize ? 2 : 4); + insn->addressSize = (insn->hasAdSize ? 2 : 4); + insn->displacementSize = (insn->hasAdSize ? 2 : 4); + insn->immediateSize = (insn->hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; - insn->addressSize = (hasAdSize ? 4 : 8); + insn->addressSize = (insn->hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; } else { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (insn->hasOpSize ? 2 : 4); + insn->addressSize = (insn->hasAdSize ? 4 : 8); + insn->displacementSize = (insn->hasOpSize ? 2 : 4); + insn->immediateSize = (insn->hasOpSize ? 2 : 4); } } @@ -758,7 +720,10 @@ insn->opcodeType = TWOBYTE; } - } + } else if (insn->mandatoryPrefix) + // The opcode with mandatory prefix must start with opcode escape. + // If not it's legacy repeat prefix + insn->mandatoryPrefix = 0; /* * At this point we have consumed the full opcode. @@ -950,15 +915,38 @@ } else { return -1; } - } else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + } else if (!insn->mandatoryPrefix) { + // If we don't have mandatory prefix we should use legacy prefixes here + if (insn->hasOpSize && (insn->mode != MODE_16BIT)) attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) + if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + if (insn->opcodeType == ONEBYTE) { + if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) + // Special support for PAUSE + attrMask |= ATTR_XS; + } else { + if (insn->repeatPrefix == 0xf2) + attrMask |= ATTR_XD; + else if (insn->repeatPrefix == 0xf3) + attrMask |= ATTR_XS; + } + } else { + switch (insn->mandatoryPrefix) { + case 0xf2: attrMask |= ATTR_XD; + break; + case 0xf3: + attrMask |= ATTR_XS; + break; + case 0x66: + if (insn->mode != MODE_16BIT) + attrMask |= ATTR_OPSIZE; + break; + case 0x67: + attrMask |= ATTR_ADSIZE; + break; + } } if (insn->rexPrefix & 0x08) @@ -977,8 +965,7 @@ * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes */ - if (insn->mode == MODE_64BIT && - isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { + if ((insn->mode == MODE_64BIT) && insn->hasOpSize) { switch (insn->opcode) { case 0xE8: case 0xE9: @@ -1058,9 +1045,9 @@ */ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { /* Make sure we observed the prefixes in any position. */ - if (insn->prefixPresent[0x67]) + if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; - if (insn->prefixPresent[0x66]) + if (insn->hasOpSize) attrMask |= ATTR_OPSIZE; /* In 16-bit, invert the attributes. */ @@ -1075,7 +1062,7 @@ return 0; } - if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && + if ((insn->mode == MODE_16BIT || insn->hasOpSize) && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that @@ -1108,7 +1095,7 @@ specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && - (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { + (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h 2017-10-17 14:41:13.000000000 +0000 @@ -546,24 +546,26 @@ // Prefix state - // 1 if the prefix byte corresponding to the entry is present; 0 if not - uint8_t prefixPresent[0x100]; - // contains the location (for use with the reader) of the prefix byte - uint64_t prefixLocations[0x100]; + // The possible mandatory prefix + uint8_t mandatoryPrefix; // The value of the vector extension prefix(EVEX/VEX/XOP), if present uint8_t vectorExtensionPrefix[4]; // The type of the vector extension prefix VectorExtensionType vectorExtensionType; // The value of the REX prefix, if present uint8_t rexPrefix; - // The location where a mandatory prefix would have to be (i.e., right before - // the opcode, or right before the REX prefix if one is present). - uint64_t necessaryPrefixLocation; // The segment override type SegmentOverride segmentOverride; // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease bool xAcquireRelease; + // Address-size override + bool hasAdSize; + // Operand-size override + bool hasOpSize; + // The repeat prefix if any + uint8_t repeatPrefix; + // Sizes of various critical pieces of data, in bytes uint8_t registerSize; uint8_t addressSize; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -50,8 +50,16 @@ HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + unsigned Flags = MI->getFlags(); if (TSFlags & X86II::LOCK) OS << "\tlock\t"; + if (!(TSFlags & X86II::LOCK) && Flags & X86::IP_HAS_LOCK) + OS << "\tlock\n"; + + if (Flags & X86::IP_HAS_REPEAT_NE) + OS << "\trepne\n"; + else if (Flags & X86::IP_HAS_REPEAT) + OS << "\trep\n"; // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -43,6 +43,12 @@ if (TSFlags & X86II::LOCK) OS << "\tlock\n"; + unsigned Flags = MI->getFlags(); + if (Flags & X86::IP_HAS_REPEAT_NE) + OS << "\trepne\n"; + else if (Flags & X86::IP_HAS_REPEAT) + OS << "\trep\n"; + printInstruction(MI, OS); // Next always print the annotation. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/MCTargetDesc/X86BaseInfo.h llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/MCTargetDesc/X86BaseInfo.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/MCTargetDesc/X86BaseInfo.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/MCTargetDesc/X86BaseInfo.h 2017-10-17 14:41:13.000000000 +0000 @@ -51,6 +51,16 @@ TO_ZERO = 3, CUR_DIRECTION = 4 }; + + /// The constants to describe instr prefixes if there are + enum IPREFIXES { + IP_NO_PREFIX = 0, + IP_HAS_OP_SIZE = 1, + IP_HAS_AD_SIZE = 2, + IP_HAS_REPEAT_NE = 4, + IP_HAS_REPEAT = 8, + IP_HAS_LOCK = 16 + }; } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -1108,7 +1108,7 @@ EmitByte(0x66, CurByte, OS); // Emit the LOCK opcode prefix. - if (TSFlags & X86II::LOCK) + if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK) EmitByte(0xF0, CurByte, OS); switch (TSFlags & X86II::OpPrefixMask) { @@ -1159,6 +1159,7 @@ unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; + unsigned Flags = MI.getFlags(); // Pseudo instructions don't get encoded. if ((TSFlags & X86II::FormMask) == X86II::Pseudo) @@ -1194,8 +1195,10 @@ MI, OS); // Emit the repeat opcode prefix as needed. - if (TSFlags & X86II::REP) + if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT) EmitByte(0xF3, CurByte, OS); + if (Flags & X86::IP_HAS_REPEAT_NE) + EmitByte(0xF2, CurByte, OS); // Emit the address size opcode prefix as needed. bool need_address_override; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86AsmPrinter.h llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86AsmPrinter.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86AsmPrinter.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86AsmPrinter.h 2017-10-17 14:41:13.000000000 +0000 @@ -135,6 +135,7 @@ bool doInitialization(Module &M) override { SMShadowTracker.reset(0); SM.reset(); + FM.reset(); return AsmPrinter::doInitialization(M); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86EvexToVex.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86EvexToVex.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86EvexToVex.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86EvexToVex.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -14,7 +14,7 @@ /// corresponding VEX encoding which is usually shorter by 2 bytes. /// EVEX instructions may be encoded via the VEX prefix when the AVX-512 /// instruction has a corresponding AVX/AVX2 opcode and when it does not -/// use the xmm or the mask registers or xmm/ymm registers wuith indexes +/// use the xmm or the mask registers or xmm/ymm registers with indexes /// higher than 15. /// The pass applies code reduction on the generated code for AVX-512 instrs. // diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86ISelLowering.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86ISelLowering.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86ISelLowering.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86ISelLowering.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -3235,9 +3235,9 @@ if (CallConv == CallingConv::X86_RegCall || Fn->hasFnAttribute("no_caller_saved_registers")) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) - MF.getRegInfo().disableCalleeSavedRegister(Pair.first); + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (std::pair Pair : MRI.liveins()) + MRI.disableCalleeSavedRegister(Pair.first); } return Chain; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86SchedSkylakeClient.td llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86SchedSkylakeClient.td --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86SchedSkylakeClient.td 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86SchedSkylakeClient.td 2017-10-17 14:41:13.000000000 +0000 @@ -307,3705 +307,3904 @@ // Remaining instrs. -def SKLWriteResGroup0 : SchedWriteRes<[SKLPort23]> { +def SKLWriteResGroup1 : SchedWriteRes<[SKLPort0]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup0], (instregex "LDDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "LD_F32m")>; -def: InstRW<[SKLWriteResGroup0], (instregex "LD_F64m")>; -def: InstRW<[SKLWriteResGroup0], (instregex "LD_F80m")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVD64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVD64to64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVQ64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOV(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOV64toPQIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOV8rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVAPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVAPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDI2PDIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVNTDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSHDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSLDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSX(16|32|64)rm16")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSX(16|32|64)rm32")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSX(16|32|64)rm8")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVUPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVUPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVZX(16|32|64)rm16")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVZX(16|32|64)rm8")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHNTA")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHT0")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHT1")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHT2")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTF128")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTI128")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTSDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTSSYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTSSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VLDDQUYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VLDDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOV64toPQIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPSYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDDUPYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDI2PDIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQAYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQUYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVNTDQAYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVNTDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVQI2PQIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSHDUPYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSHDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSLDUPYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSLDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPSYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTQYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTQrm")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PAVGBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PAVGWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMINSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMINUBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLDri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLDrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLQri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLQrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLWri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLWrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRADri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRADrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRAWri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRAWrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLDri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLDrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLQri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLQrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLWri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLWrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBUSWirr")>; + +def SKLWriteResGroup2 : SchedWriteRes<[SKLPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; + +def SKLWriteResGroup3 : SchedWriteRes<[SKLPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r")>; +def: InstRW<[SKLWriteResGroup3], (instregex "COM_FST0r")>; +def: InstRW<[SKLWriteResGroup3], (instregex "INSERTPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_MOVD64rr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PSHUFWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOV64toPQIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVDDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVHLPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVLHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSHDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSLDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKSSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKSSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKUSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKUSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PALIGNRrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PBLENDWrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFDri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFHWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFLWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSLLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSRLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "SHUFPDrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "SHUFPSrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UCOM_FPr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UCOM_Fr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKHPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKLPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKLPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VBROADCASTSSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VINSERTPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDI2PDIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVHLPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVLHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSDWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSWBYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSDWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSWBYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPALIGNRYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPALIGNRrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBLENDWYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBLENDWrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBROADCASTDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBROADCASTQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFBYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFDYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFDri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFHWYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFHWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFLWYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFLWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDQYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDQYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHQDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLBWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPDYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPDrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPSYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPSrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPSYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPSrr")>; + +def SKLWriteResGroup4 : SchedWriteRes<[SKLPort6]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup4], (instregex "JMP(16|32|64)r")>; + +def SKLWriteResGroup5 : SchedWriteRes<[SKLPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup5], (instregex "PABSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PABSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PABSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDUSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PAVGBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PAVGWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPGTBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPGTDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPGTWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSIGNBrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSIGNDrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSIGNWrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSLLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSLLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSLLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRADri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRAWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBUSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQQYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNBYrr256")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNBrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNDYrr256")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNDrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNWYrr256")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNWrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLDYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLQYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVQYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLWYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRADYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRADri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAVDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAVDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAWYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLDYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLQYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVQYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLWYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSWrr")>; + +def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup6], (instregex "FINCSTP")>; +def: InstRW<[SKLWriteResGroup6], (instregex "FNOP")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDBirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDDirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDQirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDWirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PANDNirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PANDirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PORirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBBirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBDirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBQirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBWirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PXORirr")>; + +def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADCX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADCX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADOX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADOX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTC(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTC(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTR(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTR(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CDQ")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CLAC")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVAE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVB(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVG(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVGE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVL(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVLE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNO(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNP(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVO(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVP(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CQO")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JAE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JAE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JA_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JA_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JBE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JBE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JB_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JB_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JGE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JGE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JG_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JG_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JLE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JLE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JL_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JMP_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JMP_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNO_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNO_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNP_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNP_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNS_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNS_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JO_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JO_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JP_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JP_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JS_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JS_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "RORX32ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR8r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR8ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SARX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SARX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETAEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETBr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETGEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETGr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETLEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETLr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNOr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNPr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNSr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETOr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETPr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL8r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL8ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHLX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR8r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR8ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHRX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "STAC")>; + +def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup8], (instregex "ANDN32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "ANDN64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSI32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSI64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSR32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSR64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BZHI32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BZHI64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "LEA(16|32|64)r")>; + +def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup9], (instregex "ANDNPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ANDNPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ANDPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ANDPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "BLENDPDrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "BLENDPSrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ORPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PANDNrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PANDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PXORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPDYrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPDrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPSYrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPSrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDBYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDQYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDWYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDNYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDNrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPBLENDDYrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPBLENDDrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPORYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBBYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBQYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBWYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPXORYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPXORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "XORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "XORPSrr")>; + +def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CBW")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CLC")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMC")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CWDE")>; +def: InstRW<[SKLWriteResGroup10], (instregex "DEC(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "DEC8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "INC(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "INC8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "LAHF")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV8ri(_alt?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVZX(16|32|64)rr16")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVZX(16|32|64)rr8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NEG(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NEG8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NOOP")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NOT(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NOT8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SAHF")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SGDT64m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SIDT64m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SLDT64m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SMSW16m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "STC")>; +def: InstRW<[SKLWriteResGroup10], (instregex "STRm")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SYSCALL")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST8rr")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8rr(_REV?)")>; + +def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVD64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOV(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOV8mi")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOV8mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVAPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVAPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVDQAmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVDQUmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVHPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVHPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVLPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVLPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTDQmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTI_64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVPDI2DImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVPQI2QImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVPQIto64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVSSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVUPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVUPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP32m")>; +def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP64m")>; +def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP80m")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VEXTRACTI128mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPDYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPSYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQAYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQAmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQUYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQUmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVHPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVHPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVLPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVLPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTDQmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVSDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVSSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPDYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPSYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMPTRSTm")>; + +def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> { + let Latency = 2; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup12], (instregex "COMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "COMISSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64grr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVMSKPDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVMSKPSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVPQIto64rr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "PMOVMSKBrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "UCOMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "UCOMISSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VCOMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VCOMISSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVPQIto64rr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPDYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPSYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VUCOMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VUCOMISSrr")>; -def SKLWriteResGroup1 : SchedWriteRes<[SKLPort4,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup1], (instregex "FBSTPm")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVD64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVNTQmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVQ64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOV(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOV8mi")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOV8mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVAPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVAPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVDQAmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVDQUmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVHPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVHPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVLPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVLPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTDQmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTI_64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVPDI2DImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVPQI2QImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVPQIto64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVSSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVUPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVUPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "ST_FP32m")>; -def: InstRW<[SKLWriteResGroup1], (instregex "ST_FP64m")>; -def: InstRW<[SKLWriteResGroup1], (instregex "ST_FP80m")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VEXTRACTF128mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VEXTRACTI128mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPDYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPSYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQAYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQAmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQUYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQUmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVHPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVHPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVLPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVLPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTDQYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTDQmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPDYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPSYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVPDI2DImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVPQI2QImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVPQIto64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVSDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVSSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPDYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPSYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMPTRSTm")>; - -def SKLWriteResGroup2 : SchedWriteRes<[SKLPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDUSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDUSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PAVGBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PAVGWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPEQBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPEQDirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPEQWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPGTBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPGTDirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPGTWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMAXSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMAXUBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMINSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMINUBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLDri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLDrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLQri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLQrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLWri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLWrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRADri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRADrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRAWri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRAWrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLDri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLDrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLQri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLQrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLWri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLWrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBUSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBUSWirr")>; - -def SKLWriteResGroup3 : SchedWriteRes<[SKLPort1]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup3], (instregex "MMX_MASKMOVQ64")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PABSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PABSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PABSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDUSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PAVGBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PAVGWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPGTBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPGTDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPGTWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSIGNBrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSIGNDrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSIGNWrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSLLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSLLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSLLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRADri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRAWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBUSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQQYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNBYrr256")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNBrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNDYrr256")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNDrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNWYrr256")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNWrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLQYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVQYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLWYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRADYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRADri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAVDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAVDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAWYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLQYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVQYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLWYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSWrr")>; - -def SKLWriteResGroup4 : SchedWriteRes<[SKLPort5]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup4], (instregex "COMP_FST0r")>; -def: InstRW<[SKLWriteResGroup4], (instregex "COM_FST0r")>; -def: InstRW<[SKLWriteResGroup4], (instregex "FINCSTP")>; -def: InstRW<[SKLWriteResGroup4], (instregex "FNOP")>; -def: InstRW<[SKLWriteResGroup4], (instregex "INSERTPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_MOVD64rr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_MOVD64to64rr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_MOVQ64rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PABSBrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PABSDrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PABSWrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDBirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PALIGNR64irr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PANDNirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PANDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PORirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSHUFBrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSHUFWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSIGNBrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSIGNDrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSIGNWrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBBirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKHBWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKHDQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKHWDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKLBWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKLDQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKLWDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PXORirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOV64toPQIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVDDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVDI2PDIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVHLPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVLHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVSDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVSHDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVSLDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVUPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVUPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKSSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKSSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKUSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKUSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PALIGNRrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PBLENDWrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFDri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFHWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFLWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSLLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSRLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "SHUFPDrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "SHUFPSrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UCOM_FPr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UCOM_Fr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKHPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKLPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKLPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VBROADCASTSSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VINSERTPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOV64toPQIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVDDUPYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVDDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVDI2PDIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVHLPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVLHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSHDUPYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSHDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSLDUPYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSLDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPDYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPSYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSDWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSWBYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSDWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSWBYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPALIGNRYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPALIGNRrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBLENDWYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBLENDWrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBROADCASTDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBROADCASTQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFBYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFDYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFDri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFHWYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFHWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFLWYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFLWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSLLDQYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSLLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSRLDQYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSRLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHBWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHQDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHWDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLBWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLQDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLWDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPDYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPDrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPSYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPSrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPSYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPSYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPSrr")>; - -def SKLWriteResGroup5 : SchedWriteRes<[SKLPort6]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup5], (instregex "ADC(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADC(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADC8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADCX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADCX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADOX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADOX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BT(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BT(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTC(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTC(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTR(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTS(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CDQ")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CLAC")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CQO")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JAE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JAE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JA_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JA_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JBE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JBE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JB_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JB_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JGE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JGE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JG_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JG_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JLE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JLE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JL_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JL_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JMP(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JMP_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JMP_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNO_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNO_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNP_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNP_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNS_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNS_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JO_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JO_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JP_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JP_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JS_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JS_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "RORX32ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "RORX64ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR8r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR8ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SARX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SARX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SBB(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SBB(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SBB8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETAEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETBr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETGEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETGr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETLEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETLr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNOr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNPr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNSr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETOr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETPr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETSr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL8r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL8ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHLX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHLX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR8r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR8ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHRX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHRX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "STAC")>; - -def SKLWriteResGroup6 : SchedWriteRes<[SKLPort15]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup6], (instregex "ANDN32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDN64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDNPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDNPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLENDPDrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLENDPSrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSI32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSI64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSMSK32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSMSK64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSR32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSR64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BZHI32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BZHI64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "LEA(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVAPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVAPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVDQArr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVDQUrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVPQI2QIrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVSSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ORPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PANDNrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PANDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PXORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPDYrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPDrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPSYrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPSrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPDYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPSYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQAYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQArr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQUYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQUrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVSSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDBYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDQYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDWYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDNYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDNrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPBLENDDYrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPBLENDDrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPORYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBBYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBQYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBWYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPXORYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPXORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "XORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "XORPSrr")>; - -def SKLWriteResGroup7 : SchedWriteRes<[SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup7], (instregex "ADD(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CBW")>; -//def: InstRW<[SKLWriteResGroup7], (instregex "CDQE")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CLC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CWDE")>; -def: InstRW<[SKLWriteResGroup7], (instregex "DEC(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "DEC8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "INC(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "INC8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "LAHF")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOV(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOV8ri(_alt?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOV8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVSX(16|32|64)rr16")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVSX(16|32|64)rr32")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVSX(16|32|64)rr8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVZX(16|32|64)rr16")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVZX(16|32|64)rr8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NEG(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NEG8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NOOP")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NOT(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NOT8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SAHF")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SGDT64m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SIDT64m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SLDT64m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SMSW16m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "STC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "STRm")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SYSCALL")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST8rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XCHG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR8rr(_REV?)")>; - -def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDUSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDUSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PAVGBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PAVGWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPEQBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPEQDirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPEQWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPGTBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPGTDirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPGTWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMAXSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMAXUBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMINSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMINUBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSLLDrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSLLQrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSLLWrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRADrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRAWrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRLDrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRLQrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRLWrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBUSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBUSWirm")>; - -def SKLWriteResGroup13 : SchedWriteRes<[SKLPort0,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MASKMOVQ64")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVDQU")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPDYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPDmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPSYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPSmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVDYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVDmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVQYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVQmr")>; - -def SKLWriteResGroup14 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup14], (instregex "FCOM32m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "FCOM64m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "FCOMP32m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "FCOMP64m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "INSERTPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PALIGNR64irm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PINSRWirmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PSHUFBrm64")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PSHUFWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKHBWirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKHDQirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKHWDirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKLBWirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKLDQirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKLWDirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVLPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKSSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKSSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKUSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKUSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PALIGNRrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PBLENDWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFDmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFHWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFLWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "SHUFPDrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "SHUFPSrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKLPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VINSERTPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVLPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSDWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSWBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSDWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSWBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPALIGNRYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPALIGNRrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBLENDWYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBLENDWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFDYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFDmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFHWYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFHWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFLWYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFLWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHBWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHQDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHWDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLBWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLQDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLWDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPDYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPDrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPSYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPSrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPSYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPSYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPSrm")>; - -def SKLWriteResGroup15 : SchedWriteRes<[SKLPort6,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup15], (instregex "FARJMP64")>; -def: InstRW<[SKLWriteResGroup15], (instregex "JMP(16|32|64)m")>; - -def SKLWriteResGroup16 : SchedWriteRes<[SKLPort01,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup16], (instregex "PABSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PABSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PABSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDUSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PAVGBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PAVGWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPGTBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPGTDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPGTWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSIGNBrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSIGNDrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSIGNWrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSLLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSLLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSLLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRADrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRAWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBUSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNBYrm256")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNBrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNDYrm256")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNDrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNWYrm256")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNWrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRADYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRADrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAVDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAVDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSWrm")>; - -def SKLWriteResGroup17 : SchedWriteRes<[SKLPort23,SKLPort05]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PABSBrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PABSDrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PABSWrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDBirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDDirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDQirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDWirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PANDNirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PANDirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PORirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSIGNBrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSIGNDrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSIGNWrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBBirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBDirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBQirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBWirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PXORirm")>; - -def SKLWriteResGroup18 : SchedWriteRes<[SKLPort23,SKLPort06]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup18], (instregex "ADC(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADC8rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADCX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADCX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADOX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADOX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "BT(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVS(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "RORX32mi")>; -def: InstRW<[SKLWriteResGroup18], (instregex "RORX64mi")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SARX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SARX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SBB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SBB8rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHLX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHLX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHRX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHRX64rm")>; - -def SKLWriteResGroup19 : SchedWriteRes<[SKLPort23,SKLPort15]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup19], (instregex "ANDN32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "ANDN64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSI32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSI64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSMSK32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSMSK64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSR32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSR64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BZHI32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BZHI64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "MOVBE(16|32|64)rm")>; - -def SKLWriteResGroup20 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup20], (instregex "ANDNPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ANDNPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ANDPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ANDPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "BLENDPDrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "BLENDPSrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ORPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PANDNrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PANDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PXORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPDYrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPDrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPSYrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPSrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VINSERTF128rm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VINSERTI128rm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDBYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDQYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDWYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDNYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDNrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPBLENDDYrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPBLENDDrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVQYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPORYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBBYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBQYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBWYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPXORYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPXORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "XORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "XORPSrm")>; - -def SKLWriteResGroup21 : SchedWriteRes<[SKLPort23,SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup21], (instregex "ADD(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "ADD8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "AND(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "AND8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP8mi")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP8mr")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "OR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "OR8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "POP(16|32|64)r(mr?)")>; -def: InstRW<[SKLWriteResGroup21], (instregex "SUB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "SUB8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "TEST(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup21], (instregex "TEST8mi")>; -def: InstRW<[SKLWriteResGroup21], (instregex "TEST8mr")>; -def: InstRW<[SKLWriteResGroup21], (instregex "XOR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "XOR8rm")>; - -def SKLWriteResGroup22 : SchedWriteRes<[SKLPort237,SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup22], (instregex "SFENCE")>; - -def SKLWriteResGroup23 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup23], (instregex "EXTRACTPSmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRBmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRDmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRQmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRWmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "STMXCSR")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VEXTRACTPSmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRBmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRDmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRQmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRWmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VSTMXCSR")>; - -def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup24], (instregex "FNSTCW16m")>; - -def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup25], (instregex "SETAEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETBm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETGEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETGm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETLEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETLm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNOm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNPm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNSm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETOm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETPm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETSm")>; - -def SKLWriteResGroup26 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup26], (instregex "MOVBE(16|32|64)mr")>; - -def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup27], (instregex "PUSH(16|32|64)r(mr?)")>; -def: InstRW<[SKLWriteResGroup27], (instregex "PUSH64i8")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSB")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSL")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSQ")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSW")>; - -def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 1; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup28], (instregex "BTC(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup28], (instregex "BTR(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup28], (instregex "BTS(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR8m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR8mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL8m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL8mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR8m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR8mi")>; - -def SKLWriteResGroup29 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup29], (instregex "ADD(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "ADD(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "ADD8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "ADD8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "DEC(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "DEC8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "INC(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "INC8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NEG(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NEG8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NOT(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NOT8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "POP(16|32|64)rmm")>; -def: InstRW<[SKLWriteResGroup29], (instregex "PUSH(16|32|64)rmm")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR8mr")>; - -def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0]> { - let Latency = 2; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup31], (instregex "COMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "COMISSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_MOVD64grr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PMOVMSKBrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVMSKPDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVMSKPSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVPDI2DIrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVPQIto64rr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "PMOVMSKBrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "UCOMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "UCOMISSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VCOMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VCOMISSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPDYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPSYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVPDI2DIrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVPQIto64rr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VPMOVMSKBYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VPMOVMSKBrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPDYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPSYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VUCOMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VUCOMISSrr")>; - -def SKLWriteResGroup32 : SchedWriteRes<[SKLPort5]> { +def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup32], (instregex "MMX_MOVQ2DQrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "MMX_PINSRWirri")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRBrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRDrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRQrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRWrri")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRBrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRDrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRQrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRWrri")>; +def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "MMX_PINSRWirri")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRBrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRDrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRQrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRWrri")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRBrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRDrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRQrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRWrri")>; -def SKLWriteResGroup33 : SchedWriteRes<[SKLPort05]> { +def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup33], (instregex "FDECSTP")>; -def: InstRW<[SKLWriteResGroup33], (instregex "MMX_MOVDQ2Qrr")>; +def: InstRW<[SKLWriteResGroup14], (instregex "FDECSTP")>; +def: InstRW<[SKLWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>; -def SKLWriteResGroup34 : SchedWriteRes<[SKLPort06]> { +def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup34], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup34], (instregex "CMOVBE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL8r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL8ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR8r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR8ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "SETAr")>; -def: InstRW<[SKLWriteResGroup34], (instregex "SETBEr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "CMOVA(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL8r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL8ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR8r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR8ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "SETAr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "SETBEr")>; -def SKLWriteResGroup35 : SchedWriteRes<[SKLPort015]> { +def SKLWriteResGroup16 : SchedWriteRes<[SKLPort015]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup35], (instregex "BLENDVPDrr0")>; -def: InstRW<[SKLWriteResGroup35], (instregex "BLENDVPSrr0")>; -def: InstRW<[SKLWriteResGroup35], (instregex "PBLENDVBrr0")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPDYrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPDrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPSYrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPSrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VPBLENDVBYrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VPBLENDVBrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "BLENDVPDrr0")>; +def: InstRW<[SKLWriteResGroup16], (instregex "BLENDVPSrr0")>; +def: InstRW<[SKLWriteResGroup16], (instregex "PBLENDVBrr0")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPDrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPSrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VPBLENDVBYrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VPBLENDVBrr")>; -def SKLWriteResGroup36 : SchedWriteRes<[SKLPort0156]> { +def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup36], (instregex "LFENCE")>; -def: InstRW<[SKLWriteResGroup36], (instregex "WAIT")>; -def: InstRW<[SKLWriteResGroup36], (instregex "XGETBV")>; +def: InstRW<[SKLWriteResGroup17], (instregex "LFENCE")>; +def: InstRW<[SKLWriteResGroup17], (instregex "WAIT")>; +def: InstRW<[SKLWriteResGroup17], (instregex "XGETBV")>; -def SKLWriteResGroup37 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup18 : SchedWriteRes<[SKLPort0,SKLPort237]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup37], (instregex "COMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "COMISSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "UCOMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "UCOMISSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VCOMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VCOMISSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPDYrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPSYrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VUCOMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VUCOMISSrm")>; +def: InstRW<[SKLWriteResGroup18], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVDQU")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPDYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPSYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPSmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVDmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVQmr")>; -def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup19 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup38], (instregex "PSLLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSLLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSLLWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRADrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRAWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRLWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSLLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSLLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSLLWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRADrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRAWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSLLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSLLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSLLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRADrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRAWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSLLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSLLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSLLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRADrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRAWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRLWrr")>; -def SKLWriteResGroup39 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup39], (instregex "CLFLUSH")>; +def: InstRW<[SKLWriteResGroup20], (instregex "CLFLUSH")>; -def SKLWriteResGroup40 : SchedWriteRes<[SKLPort06,SKLPort15]> { +def SKLWriteResGroup21 : SchedWriteRes<[SKLPort237,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup40], (instregex "BEXTR32rr")>; -def: InstRW<[SKLWriteResGroup40], (instregex "BEXTR64rr")>; -def: InstRW<[SKLWriteResGroup40], (instregex "BSWAP(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup21], (instregex "SFENCE")>; -def SKLWriteResGroup41 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup22 : SchedWriteRes<[SKLPort06,SKLPort15]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup41], (instregex "ADC8i8")>; -def: InstRW<[SKLWriteResGroup41], (instregex "ADC8ri")>; -def: InstRW<[SKLWriteResGroup41], (instregex "CWD")>; -def: InstRW<[SKLWriteResGroup41], (instregex "JRCXZ")>; -def: InstRW<[SKLWriteResGroup41], (instregex "SBB8i8")>; -def: InstRW<[SKLWriteResGroup41], (instregex "SBB8ri")>; - -def SKLWriteResGroup42 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKLWriteResGroup42], (instregex "MMX_PACKSSDWirm")>; -def: InstRW<[SKLWriteResGroup42], (instregex "MMX_PACKSSWBirm")>; -def: InstRW<[SKLWriteResGroup42], (instregex "MMX_PACKUSWBirm")>; - -def SKLWriteResGroup43 : SchedWriteRes<[SKLPort23,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup43], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup43], (instregex "CMOVBE(16|32|64)rm")>; - -def SKLWriteResGroup44 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup44], (instregex "BLENDVPDrm0")>; -def: InstRW<[SKLWriteResGroup44], (instregex "BLENDVPSrm0")>; -def: InstRW<[SKLWriteResGroup44], (instregex "PBLENDVBrm0")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPDYrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPDrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPSYrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPSrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VPBLENDVBYrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VPBLENDVBrm")>; - -def SKLWriteResGroup45 : SchedWriteRes<[SKLPort23,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup45], (instregex "LEAVE64")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASB")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASL")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASQ")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASW")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR32rr")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR64rr")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; -def SKLWriteResGroup46 : SchedWriteRes<[SKLPort237,SKLPort0156]> { +def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup46], (instregex "MFENCE")>; +def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8")>; +def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri")>; +def: InstRW<[SKLWriteResGroup23], (instregex "CWD")>; +def: InstRW<[SKLWriteResGroup23], (instregex "JRCXZ")>; +def: InstRW<[SKLWriteResGroup23], (instregex "SBB8i8")>; +def: InstRW<[SKLWriteResGroup23], (instregex "SBB8ri")>; -def SKLWriteResGroup47 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> { +def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup47], (instregex "FNSTSWm")>; +def: InstRW<[SKLWriteResGroup24], (instregex "EXTRACTPSmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRBmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRDmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRQmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRWmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "STMXCSR")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VEXTRACTPSmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRBmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRDmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRQmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRWmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VSTMXCSR")>; -def SKLWriteResGroup48 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { +def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup48], (instregex "FLDCW16m")>; +def: InstRW<[SKLWriteResGroup25], (instregex "FNSTCW16m")>; -def SKLWriteResGroup49 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort0156]> { +def SKLWriteResGroup26 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup49], (instregex "LDMXCSR")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VLDMXCSR")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETAEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETBm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETGEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETGm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETLEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETLm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNOm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNPm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNSm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETOm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETPm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETSm")>; -def SKLWriteResGroup51 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { +def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup51], (instregex "LRETQ")>; -def: InstRW<[SKLWriteResGroup51], (instregex "RETQ")>; +def: InstRW<[SKLWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; -def SKLWriteResGroup52 : SchedWriteRes<[SKLPort23,SKLPort06,SKLPort15]> { +def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup52], (instregex "BEXTR32rm")>; -def: InstRW<[SKLWriteResGroup52], (instregex "BEXTR64rm")>; - -def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKLWriteResGroup53], (instregex "SETAm")>; -def: InstRW<[SKLWriteResGroup53], (instregex "SETBEm")>; - -def SKLWriteResGroup54 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup54], (instregex "CALL(16|32|64)r")>; - -def SKLWriteResGroup55 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup55], (instregex "CALL64pcrel32")>; - -def SKLWriteResGroup56 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SKLWriteResGroup56], (instregex "ROL(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROL(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROL8m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROL8mi")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR8m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR8mi")>; - -def SKLWriteResGroup57 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SKLWriteResGroup57], (instregex "XADD(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup57], (instregex "XADD8rm")>; - -def SKLWriteResGroup58 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup58], (instregex "CALL(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup58], (instregex "FARCALL64")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH64i8")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSB")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSL")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSQ")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSW")>; -def SKLWriteResGroup60 : SchedWriteRes<[SKLPort1]> { +def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { let Latency = 3; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup60], (instregex "BSF(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "BSR(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "IMUL64rr(i8?)")>; -def: InstRW<[SKLWriteResGroup60], (instregex "IMUL8r")>; -def: InstRW<[SKLWriteResGroup60], (instregex "LZCNT(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "MUL8r")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PDEP32rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PDEP64rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PEXT32rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PEXT64rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "POPCNT(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "SHLD(16|32|64)rri8")>; -def: InstRW<[SKLWriteResGroup60], (instregex "SHRD(16|32|64)rri8")>; -def: InstRW<[SKLWriteResGroup60], (instregex "TZCNT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "BSF(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "BSR(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29], (instregex "IMUL8r")>; +def: InstRW<[SKLWriteResGroup29], (instregex "LZCNT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "MUL8r")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PDEP32rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PDEP64rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PEXT32rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PEXT64rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "POPCNT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[SKLWriteResGroup29], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[SKLWriteResGroup29], (instregex "TZCNT(16|32|64)rr")>; -def SKLWriteResGroup60_16 : SchedWriteRes<[SKLPort1, SKLPort0156]> { +def SKLWriteResGroup29_16 : SchedWriteRes<[SKLPort1, SKLPort0156]> { let Latency = 3; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup60_16], (instregex "IMUL16rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29_16], (instregex "IMUL16rr(i8?)")>; -def SKLWriteResGroup60_32 : SchedWriteRes<[SKLPort1]> { +def SKLWriteResGroup29_32 : SchedWriteRes<[SKLPort1]> { let Latency = 3; let NumMicroOps = 1; } -def: InstRW<[SKLWriteResGroup60_32], (instregex "IMUL32rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29_32], (instregex "IMUL32rr(i8?)")>; -def SKLWriteResGroup61 : SchedWriteRes<[SKLPort5]> { +def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> { let Latency = 3; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup61], (instregex "ADD_FPrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "ADD_FST0r")>; -def: InstRW<[SKLWriteResGroup61], (instregex "ADD_FrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "MMX_PSADBWirr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "PCMPGTQrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "PSADBWrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUBR_FPrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUBR_FST0r")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUBR_FrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUB_FPrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUB_FST0r")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUB_FrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VBROADCASTSDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VBROADCASTSSYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VEXTRACTF128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VEXTRACTI128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VINSERTF128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VINSERTI128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTBYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTBrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTWrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPCMPGTQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPCMPGTQrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERM2F128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERM2I128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMPDYri")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMPSYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMQYri")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXBDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXBQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXBWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXDQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXWDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXWQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXBDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXBQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXBWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXDQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXWDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXWQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPSADBWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPSADBWrr")>; - -def SKLWriteResGroup62 : SchedWriteRes<[SKLPort0,SKLPort5]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup62], (instregex "EXTRACTPSrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "MMX_PEXTRWirri")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRBrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRDrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRQrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRWri")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRWrr_REV")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PTESTrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VEXTRACTPSrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRBrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRDrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRQrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRWri")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRWrr_REV")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPTESTYrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPTESTrr")>; - -def SKLWriteResGroup63 : SchedWriteRes<[SKLPort0,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup63], (instregex "FNSTSW16r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FPrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FST0r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "MMX_PSADBWirr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "PCMPGTQrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "PSADBWrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUBR_FPrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUBR_FST0r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUBR_FrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUB_FPrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUB_FST0r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUB_FrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VBROADCASTSSYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VEXTRACTI128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VINSERTF128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VINSERTI128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTBrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTWrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPCMPGTQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPCMPGTQrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERM2F128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERM2I128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMPDYri")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMPSYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMQYri")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXWQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPSADBWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPSADBWrr")>; -def SKLWriteResGroup64 : SchedWriteRes<[SKLPort1,SKLPort23]> { +def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup64], (instregex "BSF(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "BSR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "IMUL64m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "IMUL(32|64)rm(i8?)")>; -def: InstRW<[SKLWriteResGroup64], (instregex "IMUL8m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "LZCNT(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "MUL64m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "MUL8m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PDEP32rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PDEP64rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PEXT32rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PEXT64rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "POPCNT(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "TZCNT(16|32|64)rm")>; - -def SKLWriteResGroup64_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup64_16], (instregex "IMUL16rm(i8?)")>; - -def SKLWriteResGroup64_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 3; - let NumMicroOps = 5; -} -def: InstRW<[SKLWriteResGroup64_16_2], (instregex "IMUL16m")>; -def: InstRW<[SKLWriteResGroup64_16_2], (instregex "MUL16m")>; +def: InstRW<[SKLWriteResGroup31], (instregex "EXTRACTPSrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRBrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRDrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRQrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRWri")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRWrr_REV")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PTESTrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VEXTRACTPSrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRBrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRDrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRQrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRWri")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRWrr_REV")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPTESTYrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPTESTrr")>; -def SKLWriteResGroup64_32 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup64_32], (instregex "IMUL32m")>; -def: InstRW<[SKLWriteResGroup64_32], (instregex "MUL32m")>; - -def SKLWriteResGroup65 : SchedWriteRes<[SKLPort5,SKLPort23]> { +def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> { let Latency = 3; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup65], (instregex "ADD_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ADD_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ILD_F16m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ILD_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ILD_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "MMX_PSADBWirm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "PCMPGTQrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "PSADBWrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUBR_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUBR_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUB_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUB_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPCMPGTQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPCMPGTQrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERM2F128rm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERM2I128rm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMPDYmi")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMPSYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMQYmi")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXBDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXBQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXBWYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXDQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXWDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXWQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXBDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXBQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXBWYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXDQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXWDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXWQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPSADBWYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPSADBWrm")>; +def: InstRW<[SKLWriteResGroup32], (instregex "FNSTSW16r")>; -def SKLWriteResGroup66 : SchedWriteRes<[SKLPort06]> { +def SKLWriteResGroup33 : SchedWriteRes<[SKLPort06]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKLWriteResGroup66], (instregex "ROL(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "ROL8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "ROR(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "ROR8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SAR(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SAR8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHL(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHL8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHR(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHR8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROL(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROL8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROR8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SAR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SAR8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHL(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHL8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHR8rCL")>; -def SKLWriteResGroup67 : SchedWriteRes<[SKLPort0156]> { +def SKLWriteResGroup34 : SchedWriteRes<[SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKLWriteResGroup67], (instregex "XADD(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup67], (instregex "XADD8rr")>; -def: InstRW<[SKLWriteResGroup67], (instregex "XCHG8rr")>; +def: InstRW<[SKLWriteResGroup34], (instregex "XADD(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup34], (instregex "XADD8rr")>; +def: InstRW<[SKLWriteResGroup34], (instregex "XCHG8rr")>; -def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0,SKLPort5]> { +def SKLWriteResGroup35 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup68], (instregex "MMX_PHADDSWrr64")>; -def: InstRW<[SKLWriteResGroup68], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[SKLWriteResGroup35], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SKLWriteResGroup35], (instregex "MMX_PHSUBSWrr64")>; -def SKLWriteResGroup69 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup36 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup69], (instregex "PHADDSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "PHSUBSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHADDSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHADDSWrr256")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHSUBSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHSUBSWrr256")>; +def: InstRW<[SKLWriteResGroup36], (instregex "PHADDSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "PHSUBSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHADDSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHADDSWrr256")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHSUBSWrr256")>; -def SKLWriteResGroup70 : SchedWriteRes<[SKLPort5,SKLPort05]> { +def SKLWriteResGroup37 : SchedWriteRes<[SKLPort5,SKLPort05]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHADDWrr64")>; -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHADDrr64")>; -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHSUBDrr64")>; -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHSUBWrr64")>; -def SKLWriteResGroup71 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup71], (instregex "PHADDDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "PHADDWrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "PHSUBDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "PHSUBWrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDDYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDWYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDWrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBDYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBWYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHADDDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHADDWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHSUBDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHSUBWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDDYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDWYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBDYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBWYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBWrr")>; -def SKLWriteResGroup72 : SchedWriteRes<[SKLPort5,SKLPort0156]> { +def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup72], (instregex "MMX_PACKSSDWirr")>; -def: InstRW<[SKLWriteResGroup72], (instregex "MMX_PACKSSWBirr")>; -def: InstRW<[SKLWriteResGroup72], (instregex "MMX_PACKUSWBirr")>; +def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKUSWBirr")>; -def SKLWriteResGroup73 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup73], (instregex "CLD")>; +def: InstRW<[SKLWriteResGroup40], (instregex "CLD")>; -def SKLWriteResGroup74 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup41 : SchedWriteRes<[SKLPort237,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup74], (instregex "RCL(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCL(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCL8r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCL8ri")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR8r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR8ri")>; +def: InstRW<[SKLWriteResGroup41], (instregex "MFENCE")>; -def SKLWriteResGroup75 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup75], (instregex "PTESTrm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "VPTESTYrm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "VPTESTrm")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL8r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL8ri")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR8r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR8ri")>; -def SKLWriteResGroup76 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { +def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup76], (instregex "ISTT_FP16m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "ISTT_FP32m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "ISTT_FP64m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_F16m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_F32m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_FP16m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_FP32m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_FP64m")>; - -def SKLWriteResGroup77 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SKLWriteResGroup77], (instregex "MMX_PHADDSWrm64")>; -def: InstRW<[SKLWriteResGroup77], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[SKLWriteResGroup43], (instregex "FNSTSWm")>; -def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { +def SKLWriteResGroup44 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { let Latency = 3; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [1,1,2]; } -def: InstRW<[SKLWriteResGroup78], (instregex "PHADDSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "PHSUBSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHADDSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHADDSWrm256")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHSUBSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHSUBSWrm256")>; +def: InstRW<[SKLWriteResGroup44], (instregex "SETAm")>; +def: InstRW<[SKLWriteResGroup44], (instregex "SETBEm")>; -def SKLWriteResGroup79 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> { +def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> { let Latency = 3; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHADDWrm64")>; -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHADDrm64")>; -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHSUBDrm64")>; -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHSUBWrm64")>; +def: InstRW<[SKLWriteResGroup45], (instregex "CALL(16|32|64)r")>; -def SKLWriteResGroup80 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup46 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 3; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKLWriteResGroup80], (instregex "PHADDDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "PHADDWrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "PHSUBDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "PHSUBWrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDDYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDWYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDWrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBDYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBWYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBWrm")>; - -def SKLWriteResGroup81 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 3; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup81], (instregex "ROR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup81], (instregex "ROR8mCL")>; - -def SKLWriteResGroup82 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SKLWriteResGroup82], (instregex "RCL(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCL(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCL8m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCL8mi")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR8m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR8mi")>; - -def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,3]; -} -def: InstRW<[SKLWriteResGroup83], (instregex "ROL(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "ROL8mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SAR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SAR8mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHL(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHL8mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHR8mCL")>; - -def SKLWriteResGroup84 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,3]; -} -def: InstRW<[SKLWriteResGroup84], (instregex "ADC(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup84], (instregex "ADC8mi")>; - -def SKLWriteResGroup85 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup85], (instregex "ADC(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup85], (instregex "ADC8mr")>; -def: InstRW<[SKLWriteResGroup85], (instregex "CMPXCHG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup85], (instregex "CMPXCHG8rm")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB8mi")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB8mr")>; +def: InstRW<[SKLWriteResGroup46], (instregex "CALL64pcrel32")>; -def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup47 : SchedWriteRes<[SKLPort0]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup86], (instregex "AESDECLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "AESDECrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "AESENCLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "AESENCrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMADDUBSWrr64")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMADDWDirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULHRSWrr64")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULHUWirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULHWirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULLWirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULUDQirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MUL_FPrST0")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MUL_FST0r")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MUL_FrST0")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RCPPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RCPSSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RSQRTPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RSQRTSSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESDECLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESDECrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESENCLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESENCrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRCPPSYr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRCPPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRCPSSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRSQRTPSYr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRSQRTPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRSQRTSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESDECLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESDECrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESENCLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESENCrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULHWirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULLWirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MUL_FPrST0")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MUL_FST0r")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MUL_FrST0")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RCPPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RCPSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RSQRTPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RSQRTSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESDECLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESDECrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESENCLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESENCrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRCPPSYr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRCPPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRCPSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRSQRTPSYr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRSQRTPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRSQRTSSr")>; -def SKLWriteResGroup87 : SchedWriteRes<[SKLPort01]> { +def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup87], (instregex "ADDPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBSSrr")>; -def SKLWriteResGroup89 : SchedWriteRes<[SKLPort015]> { +def SKLWriteResGroup49 : SchedWriteRes<[SKLPort015]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup89], (instregex "CMPPDrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CMPPSrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CMPSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CVTDQ2PSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CVTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CVTTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PHMINPOSUWrr128")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMADDUBSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMADDWDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULDQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULHRSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULHUWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULHWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULLWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULUDQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPDYrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPDrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPSYrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPSrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTDQ2PSYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTDQ2PSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2DQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTTPS2DQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPDYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPSYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPDYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPSYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPHMINPOSUWrr128")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDUBSWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDUBSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDWDYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDWDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULDQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULDQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHRSWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHRSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHUWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHUWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULLWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULLWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULUDQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULUDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CMPPDrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CMPPSrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CMPSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CVTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMADDUBSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMADDWDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULHRSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULHUWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULHWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULLWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULUDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPDYrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPDrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPSYrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPSrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTTPS2DQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDUBSWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDWDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDWDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULDQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHRSWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHRSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHUWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHUWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULLWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULLWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULUDQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULUDQrr")>; -def SKLWriteResGroup90 : SchedWriteRes<[SKLPort5]> { +def SKLWriteResGroup50 : SchedWriteRes<[SKLPort5]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup90], (instregex "MPSADBWrri")>; -def: InstRW<[SKLWriteResGroup90], (instregex "VMPSADBWYrri")>; -def: InstRW<[SKLWriteResGroup90], (instregex "VMPSADBWrri")>; - -def SKLWriteResGroup91 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup91], (instregex "AESDECLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "AESDECrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "AESENCLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "AESENCrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_CVTPI2PSirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMADDUBSWrm64")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMADDWDirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULHRSWrm64")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULHUWirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULHWirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULLWirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULUDQirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MUL_F32m")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MUL_F64m")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RCPPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RCPSSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RSQRTPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RSQRTSSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESDECLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESDECrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESENCLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESENCrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRCPPSYm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRCPPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRCPSSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRSQRTPSYm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRSQRTPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRSQRTSSm")>; +def: InstRW<[SKLWriteResGroup50], (instregex "MPSADBWrri")>; +def: InstRW<[SKLWriteResGroup50], (instregex "VMPSADBWYrri")>; +def: InstRW<[SKLWriteResGroup50], (instregex "VMPSADBWrri")>; -def SKLWriteResGroup92 : SchedWriteRes<[SKLPort1,SKLPort5]> { +def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup92], (instregex "IMUL64r")>; -def: InstRW<[SKLWriteResGroup92], (instregex "MUL64r")>; -def: InstRW<[SKLWriteResGroup92], (instregex "MULX64rr")>; +def: InstRW<[SKLWriteResGroup51], (instregex "IMUL64r")>; +def: InstRW<[SKLWriteResGroup51], (instregex "MUL64r")>; +def: InstRW<[SKLWriteResGroup51], (instregex "MULX64rr")>; -def SKLWriteResGroup92_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 4; - let NumMicroOps = 4; + let NumMicroOps = 4; } -def: InstRW<[SKLWriteResGroup92_16], (instregex "IMUL16r")>; -def: InstRW<[SKLWriteResGroup92_16], (instregex "MUL16r")>; +def: InstRW<[SKLWriteResGroup51_16], (instregex "IMUL16r")>; +def: InstRW<[SKLWriteResGroup51_16], (instregex "MUL16r")>; -def SKLWriteResGroup93 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup52 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup93], (instregex "VPSLLDYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSLLQYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSLLWYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRADYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRAWYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRLDYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRLQYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRLWYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLDYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLQYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLWYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRADYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRAWYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRLDYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRLQYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRLWYrr")>; -def SKLWriteResGroup94 : SchedWriteRes<[SKLPort01,SKLPort23]> { +def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup94], (instregex "ADDPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBSSrm")>; +def: InstRW<[SKLWriteResGroup53], (instregex "ISTT_FP16m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "ISTT_FP32m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "ISTT_FP64m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_F16m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_F32m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_FP16m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_FP32m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_FP64m")>; -def SKLWriteResGroup96 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup96], (instregex "CMPPDrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CMPPSrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CMPSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTDQ2PSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTPS2PDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTSS2SDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MMX_CVTPS2PIirm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MMX_CVTTPS2PIirm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PHMINPOSUWrm128")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMADDUBSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMADDWDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULDQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULHRSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULHUWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULHWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULLWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULUDQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPDYrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPDrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPSYrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPSrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTDQ2PSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTDQ2PSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPH2PSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPH2PSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2DQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2PDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2PDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTSS2SDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTTPS2DQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPHMINPOSUWrm128")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDUBSWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDUBSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDWDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDWDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULDQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULDQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHRSWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHRSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHUWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHUWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULLWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULLWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULUDQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULUDQrm")>; - -def SKLWriteResGroup97 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKLWriteResGroup97], (instregex "FICOM16m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "FICOM32m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "FICOMP16m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "FICOMP32m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "MPSADBWrmi")>; -def: InstRW<[SKLWriteResGroup97], (instregex "VMPSADBWYrmi")>; -def: InstRW<[SKLWriteResGroup97], (instregex "VMPSADBWrmi")>; - -def SKLWriteResGroup98 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup98], (instregex "MULX64rm")>; - -def SKLWriteResGroup100 : SchedWriteRes<[SKLPort0156]> { +def SKLWriteResGroup54 : SchedWriteRes<[SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [4]; } -def: InstRW<[SKLWriteResGroup100], (instregex "FNCLEX")>; +def: InstRW<[SKLWriteResGroup54], (instregex "FNCLEX")>; -def SKLWriteResGroup101 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def SKLWriteResGroup55 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup101], (instregex "PAUSE")>; +def: InstRW<[SKLWriteResGroup55], (instregex "PAUSE")>; -def SKLWriteResGroup102 : SchedWriteRes<[SKLPort015,SKLPort0156]> { +def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup102], (instregex "VZEROUPPER")>; +def: InstRW<[SKLWriteResGroup56], (instregex "VZEROUPPER")>; -def SKLWriteResGroup103 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { +def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } -def: InstRW<[SKLWriteResGroup103], (instregex "LAR(16|32|64)rr")>; - -def SKLWriteResGroup105 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup105], (instregex "SHLD(16|32|64)mri8")>; -def: InstRW<[SKLWriteResGroup105], (instregex "SHRD(16|32|64)mri8")>; - -def SKLWriteResGroup106 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SKLWriteResGroup106], (instregex "LAR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup106], (instregex "LSL(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKLWriteResGroup107 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 6; - let ResourceCycles = [1,1,4]; +def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup107], (instregex "PUSHF16")>; -def: InstRW<[SKLWriteResGroup107], (instregex "PUSHF64")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVD64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOV64toPQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOV8rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVDDUPrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSSrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHNTA")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHT0")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHT1")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHT2")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVDDUPrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVSDrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVSSrm")>; -def SKLWriteResGroup109 : SchedWriteRes<[SKLPort0,SKLPort5]> { +def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup109], (instregex "CVTDQ2PDrr")>; -def: InstRW<[SKLWriteResGroup109], (instregex "MMX_CVTPI2PDirr")>; -def: InstRW<[SKLWriteResGroup109], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[SKLWriteResGroup59], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SKLWriteResGroup59], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SKLWriteResGroup59], (instregex "VCVTDQ2PDrr")>; -def SKLWriteResGroup110 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup110], (instregex "CVTPD2DQrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTPD2PSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTPS2PDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSD2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSI2SD64rr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSI2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSI2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSS2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTTPD2DQrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTPD2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTPS2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTTPD2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTTPS2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPD2DQrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPD2PSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPH2PSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPS2PDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPS2PHrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSD2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSI2SD64rr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSI2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSI2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSS2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTPD2PSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTPS2PDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSD2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSS2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPH2PSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPS2PHrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSD2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTTPD2DQrr")>; -def SKLWriteResGroup113 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> { let Latency = 5; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup113], (instregex "CVTDQ2PDrm")>; -def: InstRW<[SKLWriteResGroup113], (instregex "MMX_CVTPI2PDirm")>; -def: InstRW<[SKLWriteResGroup113], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>; -def SKLWriteResGroup114 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> { +def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 5; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup114], (instregex "STR(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup62], (instregex "IMUL32r")>; +def: InstRW<[SKLWriteResGroup62], (instregex "MUL32r")>; +def: InstRW<[SKLWriteResGroup62], (instregex "MULX32rr")>; -def SKLWriteResGroup115 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; } -def: InstRW<[SKLWriteResGroup115], (instregex "IMUL32r")>; -def: InstRW<[SKLWriteResGroup115], (instregex "MUL32r")>; -def: InstRW<[SKLWriteResGroup115], (instregex "MULX32rr")>; +def: InstRW<[SKLWriteResGroup63], (instregex "XSETBV")>; -def SKLWriteResGroup116 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup64 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; } -def: InstRW<[SKLWriteResGroup116], (instregex "CVTPD2DQrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "CVTPD2PSrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "CVTSD2SSrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "CVTTPD2DQrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "MMX_CVTPD2PIirm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "MMX_CVTTPD2PIirm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "VCVTSD2SSrm")>; +def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG8rr")>; -def SKLWriteResGroup118 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; } -def: InstRW<[SKLWriteResGroup118], (instregex "MULX32rm")>; +def: InstRW<[SKLWriteResGroup65], (instregex "PUSHF16")>; +def: InstRW<[SKLWriteResGroup65], (instregex "PUSHF64")>; -def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; +def SKLWriteResGroup66 : SchedWriteRes<[SKLPort5]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup119], (instregex "VCVTPS2PHmr")>; +def: InstRW<[SKLWriteResGroup66], (instregex "PCLMULQDQrr")>; +def: InstRW<[SKLWriteResGroup66], (instregex "VPCLMULQDQrr")>; -def SKLWriteResGroup120 : SchedWriteRes<[SKLPort06,SKLPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [1,4]; +def SKLWriteResGroup67 : SchedWriteRes<[SKLPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup120], (instregex "XSETBV")>; +def: InstRW<[SKLWriteResGroup67], (instregex "LDDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVAPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVAPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVNTDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVSHDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVSLDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVUPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVUPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VLDDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVAPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVAPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVNTDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVUPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVUPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VPBROADCASTDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VPBROADCASTQrm")>; -def SKLWriteResGroup121 : SchedWriteRes<[SKLPort06,SKLPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; +def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup121], (instregex "CMPXCHG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup121], (instregex "CMPXCHG8rr")>; +def: InstRW<[SKLWriteResGroup68], (instregex "MMX_CVTPI2PSirr")>; -def SKLWriteResGroup122 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 5; - let NumMicroOps = 8; - let ResourceCycles = [1,1,1,1,1,3]; +def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup122], (instregex "ADD8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "AND8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "OR8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "SUB8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "XCHG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "XCHG8rm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "XOR8mi")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PAVGBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PAVGWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMINSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMINUBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSLLDrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSLLQrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSLLWrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRADrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRAWrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRLDrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRLQrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRLWrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBUSWirm")>; -def SKLWriteResGroup123 : SchedWriteRes<[SKLPort5]> { +def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort015]> { let Latency = 6; - let NumMicroOps = 1; - let ResourceCycles = [1]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup123], (instregex "PCLMULQDQrr")>; -def: InstRW<[SKLWriteResGroup123], (instregex "VPCLMULQDQrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTTSD2SIrr")>; -def SKLWriteResGroup124 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup71 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 6; let NumMicroOps = 2; - let ResourceCycles = [2]; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup124], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVHPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVHPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVLPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVLPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRBrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRWrmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXWQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXWQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVHPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVHPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVLPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVLPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRBrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRWrmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXWQrm")>; -def SKLWriteResGroup125 : SchedWriteRes<[SKLPort0,SKLPort015]> { +def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSD2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSS2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTTSD2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSD2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSS2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64")>; +def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>; -def SKLWriteResGroup126 : SchedWriteRes<[SKLPort5,SKLPort23]> { +def SKLWriteResGroup73 : SchedWriteRes<[SKLPort23,SKLPort05]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup126], (instregex "PCLMULQDQrm")>; -def: InstRW<[SKLWriteResGroup126], (instregex "VPCLMULQDQrm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDBirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDDirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDQirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDWirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PANDNirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PANDirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PORirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBBirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBDirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBQirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBWirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PXORirm")>; -def SKLWriteResGroup127 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> { let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup127], (instregex "HADDPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "HADDPSrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "HSUBPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "HSUBPSrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPDYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPSYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPSrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPDYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPSYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPSrr")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADC(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADC8rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADCX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADCX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADOX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADOX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVAE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVB(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVG(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVGE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVL(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVLE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNO(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNP(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNS(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVO(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVP(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "RORX32mi")>; +def: InstRW<[SKLWriteResGroup74], (instregex "RORX64mi")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SARX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SARX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SBB(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SBB8rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHLX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHLX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHRX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHRX64rm")>; -def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort23]> { +def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> { let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup75], (instregex "ANDN32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "ANDN64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSI32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSI64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSR32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSR64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BZHI32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BZHI64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "MOVBE(16|32|64)rm")>; + +def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup128], (instregex "ADD_FI16m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "ADD_FI32m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUBR_FI16m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUBR_FI32m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUB_FI16m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUB_FI32m")>; +def: InstRW<[SKLWriteResGroup76], (instregex "ADD(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "ADD8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "AND(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "AND8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP8mi")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP8mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "OR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "OR8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "SUB(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "SUB8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "TEST(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "TEST8mi")>; +def: InstRW<[SKLWriteResGroup76], (instregex "TEST8mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "XOR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "XOR8rm")>; -def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup77 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup129], (instregex "CVTSI2SS64rr")>; -def: InstRW<[SKLWriteResGroup129], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HADDPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HADDPSrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HSUBPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HSUBPSrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPDYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPSYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPSrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPDYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPSYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPSrr")>; -def SKLWriteResGroup130 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort015]> { +def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 6; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSS2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSS2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTTSS2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSS2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSS2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSS2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup78], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SKLWriteResGroup78], (instregex "VCVTSI2SS64rr")>; -def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 6; let NumMicroOps = 4; let ResourceCycles = [1,2,1]; } -def: InstRW<[SKLWriteResGroup131], (instregex "SHLD(16|32|64)rrCL")>; -def: InstRW<[SKLWriteResGroup131], (instregex "SHRD(16|32|64)rrCL")>; +def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[SKLWriteResGroup79], (instregex "SHRD(16|32|64)rrCL")>; -def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { +def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 6; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup133], (instregex "HADDPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "HADDPSrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "HSUBPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "HSUBPSrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPDYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPSYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPSrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPDYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPSrm")>; +def: InstRW<[SKLWriteResGroup80], (instregex "SLDT(16|32|64)r")>; -def SKLWriteResGroup134 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup81 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { let Latency = 6; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup134], (instregex "SLDT(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup81], (instregex "VCVTPS2PHmr")>; -def SKLWriteResGroup136 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { let Latency = 6; - let NumMicroOps = 6; - let ResourceCycles = [1,5]; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup136], (instregex "STD")>; +def: InstRW<[SKLWriteResGroup82], (instregex "BTC(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup82], (instregex "BTR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup82], (instregex "BTS(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR8m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR8mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL8m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL8mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR8m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR8mi")>; -def SKLWriteResGroup137 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "DEC(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "DEC8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "INC(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "INC8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NEG(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NEG8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NOT(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NOT8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[SKLWriteResGroup83], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR8mr")>; + +def SKLWriteResGroup84 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 6; let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; + let ResourceCycles = [1,5]; +} +def: InstRW<[SKLWriteResGroup84], (instregex "STD")>; + +def SKLWriteResGroup85 : SchedWriteRes<[SKLPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup137], (instregex "SHLD(16|32|64)mrCL")>; -def: InstRW<[SKLWriteResGroup137], (instregex "SHRD(16|32|64)mrCL")>; +def: InstRW<[SKLWriteResGroup85], (instregex "LD_F32m")>; +def: InstRW<[SKLWriteResGroup85], (instregex "LD_F64m")>; +def: InstRW<[SKLWriteResGroup85], (instregex "LD_F80m")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTF128")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTI128")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VLDDQUYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVAPDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVAPSYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVDQAYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVDQUYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVUPDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVUPSYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VPBROADCASTQYrm")>; -def SKLWriteResGroup142 : SchedWriteRes<[SKLPort0,SKLPort5]> { +def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup142], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>; -def SKLWriteResGroup143 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPD2DQYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPD2PSYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPH2PSYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPS2PDYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPS2PHYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SKLWriteResGroup87], (instregex "COMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "COMISSrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "UCOMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "UCOMISSrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VCOMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VCOMISSrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VUCOMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VUCOMISSrm")>; + +def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup88], (instregex "INSERTPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKSSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKSSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKUSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKUSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PALIGNRrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PBLENDWrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFDmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFHWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFLWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "SHUFPDrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "SHUFPSrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKHPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKHPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKLPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKLPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VINSERTPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKSSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKSSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKUSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKUSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPALIGNRrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPBLENDWrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPBROADCASTBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPBROADCASTWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPDmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPSmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFDmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFHWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFLWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VSHUFPDrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VSHUFPSrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKLPSrm")>; + +def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTTPD2DQYrr")>; + +def SKLWriteResGroup90 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup90], (instregex "PABSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PABSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PABSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDUSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PAVGBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PAVGWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPGTBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPGTDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPGTWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSIGNBrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSIGNDrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSIGNWrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSLLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSLLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSLLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRADrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRAWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBUSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPABSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPABSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPABSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDUSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPAVGBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPAVGWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPGTBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPGTDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPGTWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSIGNBrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSIGNDrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSIGNWrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLVDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLVQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRADrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRAVDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRAWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLVDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLVQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBUSWrm")>; + +def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup91], (instregex "ANDNPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ANDNPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ANDPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ANDPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "BLENDPDrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "BLENDPSrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ORPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PANDNrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PANDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PXORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDNPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDNPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VBLENDPDrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VBLENDPSrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VINSERTF128rm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VINSERTI128rm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VMASKMOVPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VORPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPANDNrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPANDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPBLENDDrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPMASKMOVDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPMASKMOVQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPXORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VXORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VXORPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "XORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "XORPSrm")>; + +def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKUSWBirm")>; -def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def SKLWriteResGroup93 : SchedWriteRes<[SKLPort23,SKLPort06]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup93], (instregex "CMOVA(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup93], (instregex "CMOVBE(16|32|64)rm")>; + +def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup94], (instregex "LEAVE64")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASB")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASL")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASQ")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASW")>; + +def SKLWriteResGroup95 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup145], (instregex "MUL_FI16m")>; -def: InstRW<[SKLWriteResGroup145], (instregex "MUL_FI32m")>; -def: InstRW<[SKLWriteResGroup145], (instregex "VCVTDQ2PDYrm")>; +def: InstRW<[SKLWriteResGroup95], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "VCVTTSS2SIrr")>; -def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { +def SKLWriteResGroup96 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup146], (instregex "CVTTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup146], (instregex "CVTTSS2SIrr")>; -def: InstRW<[SKLWriteResGroup146], (instregex "VCVTTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup146], (instregex "VCVTTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup96], (instregex "FLDCW16m")>; -def SKLWriteResGroup149 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup97 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort0156]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup149], (instregex "CVTTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup97], (instregex "LDMXCSR")>; +def: InstRW<[SKLWriteResGroup97], (instregex "VLDMXCSR")>; -def SKLWriteResGroup150 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { +def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup98], (instregex "LRETQ")>; +def: InstRW<[SKLWriteResGroup98], (instregex "RETQ")>; + +def SKLWriteResGroup99 : SchedWriteRes<[SKLPort23,SKLPort06,SKLPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR32rm")>; +def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR64rm")>; + +def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup100], (instregex "ROL(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL8m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL8mi")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR8m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR8mi")>; + +def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup101], (instregex "XADD(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup101], (instregex "XADD8rm")>; + +def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; } -def: InstRW<[SKLWriteResGroup150], (instregex "VCVTPS2PHYmr")>; +def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup102], (instregex "FARCALL64")>; -def SKLWriteResGroup151 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 7; let NumMicroOps = 7; let ResourceCycles = [1,3,1,2]; } -def: InstRW<[SKLWriteResGroup151], (instregex "LOOP")>; +def: InstRW<[SKLWriteResGroup103], (instregex "LOOP")>; -def SKLWriteResGroup156 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup104 : SchedWriteRes<[SKLPort0]> { let Latency = 8; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup156], (instregex "AESIMCrr")>; -def: InstRW<[SKLWriteResGroup156], (instregex "VAESIMCrr")>; +def: InstRW<[SKLWriteResGroup104], (instregex "AESIMCrr")>; +def: InstRW<[SKLWriteResGroup104], (instregex "VAESIMCrr")>; -def SKLWriteResGroup157 : SchedWriteRes<[SKLPort015]> { +def SKLWriteResGroup105 : SchedWriteRes<[SKLPort015]> { let Latency = 8; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup157], (instregex "PMULLDrr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDPDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDPSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDSDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDSSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VPMULLDYrr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VPMULLDrr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDPDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDPSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDSDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDSSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDYPDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDYPSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "PMULLDrr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDYrr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDrr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPSr")>; + +def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup106], (instregex "VTESTPDrm")>; +def: InstRW<[SKLWriteResGroup106], (instregex "VTESTPSrm")>; + +def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL64m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL8m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "MUL(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "MUL8m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PDEP32rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PDEP64rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PEXT32rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PEXT64rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "TZCNT(16|32|64)rm")>; + +def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup107_16], (instregex "IMUL16rm(i8?)")>; + +def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { + let Latency = 3; + let NumMicroOps = 5; +} +def: InstRW<[SKLWriteResGroup107_16_2], (instregex "IMUL16m")>; +def: InstRW<[SKLWriteResGroup107_16_2], (instregex "MUL16m")>; + +def SKLWriteResGroup107_32 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup107_32], (instregex "IMUL32m")>; +def: InstRW<[SKLWriteResGroup107_32], (instregex "MUL32m")>; + +def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "FCOM64m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "FCOMP32m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "FCOMP64m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "MMX_PSADBWirm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKSSDWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKSSWBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKUSDWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKUSWBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPALIGNRYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPBLENDWYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPDYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPSYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPSYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFDYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFHWYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFLWYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKLPSYrm")>; + +def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup109], (instregex "VPABSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPABSDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPABSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDUSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDUSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPAVGBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPAVGWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPGTBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPGTDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPGTWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXSDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXUBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXUDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXUWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINSDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINUBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINUDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINUWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSIGNBYrm256")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSIGNDYrm256")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSIGNWYrm256")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLVDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLVQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRADYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRAVDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRAWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLVDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLVQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBUSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBUSWYrm")>; + +def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VANDPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VANDPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPSYrmi")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VORPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VORPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDBYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDQYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDWYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPANDNYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPANDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPBLENDDYrmi")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPMASKMOVQYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPORYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBBYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBQYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBWYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPXORYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VXORPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VXORPSYrm")>; + +def SKLWriteResGroup111 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup111], (instregex "BLENDVPDrm0")>; +def: InstRW<[SKLWriteResGroup111], (instregex "BLENDVPSrm0")>; +def: InstRW<[SKLWriteResGroup111], (instregex "PBLENDVBrm0")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VBLENDVPDrm")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VBLENDVPSrm")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VPBLENDVBYrm")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VPBLENDVBrm")>; + +def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PHSUBSWrm64")>; + +def SKLWriteResGroup113 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHSUBWrm64")>; + +def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup114], (instregex "VCVTPS2PHYmr")>; + +def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKLWriteResGroup115], (instregex "ROR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup115], (instregex "ROR8mCL")>; + +def SKLWriteResGroup116 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup116], (instregex "RCL(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCL(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCL8m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCL8mi")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR8m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR8mi")>; + +def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKLWriteResGroup117], (instregex "ROL(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "ROL8mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SAR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SAR8mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHL(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHL8mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHR8mCL")>; + +def SKLWriteResGroup118 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKLWriteResGroup118], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup118], (instregex "ADC8mi")>; + +def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKLWriteResGroup119], (instregex "ADC(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup119], (instregex "ADC8mr")>; +def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG8rm")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB8mi")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB8mr")>; + +def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULHWirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULLWirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "RCPSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "RSQRTSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VRCPSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VRSQRTSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VTESTPDYrm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VTESTPSYrm")>; + +def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup121], (instregex "PCMPGTQrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "PSADBWrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPCMPGTQrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVZXWDYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPSADBWrm")>; + +def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup122], (instregex "ADDSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "ADDSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "MULSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "MULSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "SUBSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "SUBSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VADDSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VADDSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VMULSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VMULSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VSUBSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VSUBSSrm")>; + +def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup123], (instregex "CMPSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "CVTPS2PDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MAXSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MAXSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MINSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MINSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCMPSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCMPSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCVTPH2PSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMAXSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMAXSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMINSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMINSSrm")>; + +def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup124], (instregex "DPPDrri")>; +def: InstRW<[SKLWriteResGroup124], (instregex "VDPPDrri")>; + +def SKLWriteResGroup125 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPSYrm")>; + +def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup126], (instregex "PTESTrm")>; +def: InstRW<[SKLWriteResGroup126], (instregex "VPTESTrm")>; + +def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup127], (instregex "MULX64rm")>; + +def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup128], (instregex "PHADDSWrm128")>; +def: InstRW<[SKLWriteResGroup128], (instregex "PHSUBSWrm128")>; +def: InstRW<[SKLWriteResGroup128], (instregex "VPHADDSWrm128")>; +def: InstRW<[SKLWriteResGroup128], (instregex "VPHSUBSWrm128")>; + +def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup129], (instregex "PHADDDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "PHADDWrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "PHSUBDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "PHSUBWrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHADDDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHADDWrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHSUBDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHSUBWrm")>; + +def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8")>; +def: InstRW<[SKLWriteResGroup130], (instregex "SHRD(16|32|64)mri8")>; + +def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup131], (instregex "LSL(16|32|64)rm")>; + +def SKLWriteResGroup132 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup132], (instregex "AESDECLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "AESDECrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "AESENCLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "AESENCrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "RCPPSm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "RSQRTPSm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESDECLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESDECrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESENCLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESENCrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VRCPPSm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VRSQRTPSm")>; + +def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup133], (instregex "ADD_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ADD_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ILD_F16m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ILD_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ILD_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUBR_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUBR_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUB_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUB_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPCMPGTQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERM2F128rm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERM2I128rm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMDYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMPDYmi")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMPSYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMQYmi")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXWQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPSADBWYrm")>; + +def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup134], (instregex "ADDPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "ADDPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "ADDSUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "ADDSUBPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "MULPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "MULPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "SUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "SUBPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDSUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDSUBPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VMULPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VMULPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VSUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VSUBPSrm")>; + +def SKLWriteResGroup135 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup135], (instregex "CMPPDrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CMPPSrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTSS2SDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MAXPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MAXPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MINPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MINPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMADDUBSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMADDWDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULDQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULHRSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULHUWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULHWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULLWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULUDQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCMPPDrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCMPPSrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMAXPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMAXPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMINPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMINPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMADDWDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULDQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULHRSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULHUWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULHWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULLWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULUDQrm")>; + +def SKLWriteResGroup136 : SchedWriteRes<[SKLPort0]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKLWriteResGroup136], (instregex "PCMPISTRIrr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "VPCMPISTRM128rr")>; -def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 8; +def SKLWriteResGroup137 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 10; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup160], (instregex "AESIMCrm")>; -def: InstRW<[SKLWriteResGroup160], (instregex "VAESIMCrm")>; +def: InstRW<[SKLWriteResGroup137], (instregex "MPSADBWrmi")>; +def: InstRW<[SKLWriteResGroup137], (instregex "VMPSADBWrmi")>; -def SKLWriteResGroup161 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 8; +def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 10; let NumMicroOps = 3; - let ResourceCycles = [1,2]; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup161], (instregex "PMULLDrm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDPDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDPSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDSDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDSSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VPMULLDYrm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VPMULLDrm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDPDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDPSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDSDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDSSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDYPDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDYPSm")>; +def: InstRW<[SKLWriteResGroup138], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SKLWriteResGroup138], (instregex "VPTESTYrm")>; -def SKLWriteResGroup165 : SchedWriteRes<[SKLPort5,SKLPort015]> { - let Latency = 9; +def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 10; let NumMicroOps = 3; - let ResourceCycles = [1,2]; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup165], (instregex "DPPDrri")>; -def: InstRW<[SKLWriteResGroup165], (instregex "VDPPDrri")>; +def: InstRW<[SKLWriteResGroup139], (instregex "CVTSD2SSrm")>; +def: InstRW<[SKLWriteResGroup139], (instregex "VCVTSD2SSrm")>; -def SKLWriteResGroup167 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { - let Latency = 9; +def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { + let Latency = 10; let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; + let ResourceCycles = [2,1,1]; } -def: InstRW<[SKLWriteResGroup167], (instregex "DPPDrmi")>; -def: InstRW<[SKLWriteResGroup167], (instregex "VDPPDrmi")>; +def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWrm256")>; +def: InstRW<[SKLWriteResGroup140], (instregex "VPHSUBSWrm256")>; -def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup141 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [3]; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def: InstRW<[SKLWriteResGroup169], (instregex "PCMPISTRIrr")>; -def: InstRW<[SKLWriteResGroup169], (instregex "PCMPISTRM128rr")>; -def: InstRW<[SKLWriteResGroup169], (instregex "VPCMPISTRIrr")>; -def: InstRW<[SKLWriteResGroup169], (instregex "VPCMPISTRM128rr")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDDYrm")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDWYrm")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHSUBDYrm")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHSUBWYrm")>; -def SKLWriteResGroup170 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> { let Latency = 10; let NumMicroOps = 4; - let ResourceCycles = [3,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup170], (instregex "PCMPISTRIrm")>; -def: InstRW<[SKLWriteResGroup170], (instregex "PCMPISTRM128rm")>; -def: InstRW<[SKLWriteResGroup170], (instregex "VPCMPISTRIrm")>; -def: InstRW<[SKLWriteResGroup170], (instregex "VPCMPISTRM128rm")>; +def: InstRW<[SKLWriteResGroup142], (instregex "MULX32rm")>; -def SKLWriteResGroup171 : SchedWriteRes<[SKLPort05,SKLPort0156]> { +def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 10; - let NumMicroOps = 10; - let ResourceCycles = [9,1]; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,3]; } -def: InstRW<[SKLWriteResGroup171], (instregex "MMX_EMMS")>; +def: InstRW<[SKLWriteResGroup143], (instregex "ADD8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "AND8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "OR8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "SUB8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "XCHG(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup143], (instregex "XCHG8rm")>; +def: InstRW<[SKLWriteResGroup143], (instregex "XOR8mi")>; -def SKLWriteResGroup172 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup144 : SchedWriteRes<[SKLPort05,SKLPort0156]> { let Latency = 10; let NumMicroOps = 10; - let ResourceCycles = [1,1,1,5,1,1]; + let ResourceCycles = [9,1]; } -def: InstRW<[SKLWriteResGroup172], (instregex "RCL(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup172], (instregex "RCL8mCL")>; +def: InstRW<[SKLWriteResGroup144], (instregex "MMX_EMMS")>; -def SKLWriteResGroup173 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0]> { let Latency = 11; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup173], (instregex "DIVPSrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "DIVSSrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "VDIVPSYrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "VDIVPSrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "VDIVSSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "DIVPSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "DIVSSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "VDIVPSYrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "VDIVPSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "VDIVSSrr")>; + +def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F32m")>; +def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F64m")>; +def: InstRW<[SKLWriteResGroup146], (instregex "VRCPPSYm")>; +def: InstRW<[SKLWriteResGroup146], (instregex "VRSQRTPSYm")>; + +def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VADDPSYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VMULPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VMULPSYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VSUBPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VSUBPSYrm")>; -def SKLWriteResGroup174 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup148 : SchedWriteRes<[SKLPort23,SKLPort015]> { let Latency = 11; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup174], (instregex "DIVPSrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "DIVSSrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "VDIVPSYrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "VDIVPSrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "VDIVSSrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCMPPDYrmi")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCMPPSYrmi")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMAXPDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMAXPSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMINPDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMINPSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMADDWDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULDQYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULHRSWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULHUWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULHWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULLWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULUDQYrm")>; + +def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKLWriteResGroup149], (instregex "FICOM16m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "FICOM32m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "FICOMP16m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "FICOMP32m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "VMPSADBWYrmi")>; + +def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup150], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SKLWriteResGroup150], (instregex "VCVTDQ2PDrm")>; -def SKLWriteResGroup175 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup151 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSS2SIrm")>; + +def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2DQrm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "MMX_CVTTPD2PIirm")>; + +def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL")>; +def: InstRW<[SKLWriteResGroup153], (instregex "SHRD(16|32|64)mrCL")>; + +def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 11; let NumMicroOps = 7; let ResourceCycles = [2,3,2]; } -def: InstRW<[SKLWriteResGroup175], (instregex "RCL(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup175], (instregex "RCR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup154], (instregex "RCL(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup154], (instregex "RCR(16|32|64)rCL")>; -def SKLWriteResGroup176 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup155 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 11; let NumMicroOps = 9; let ResourceCycles = [1,5,1,2]; } -def: InstRW<[SKLWriteResGroup176], (instregex "RCL8rCL")>; +def: InstRW<[SKLWriteResGroup155], (instregex "RCL8rCL")>; -def SKLWriteResGroup177 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 11; let NumMicroOps = 11; let ResourceCycles = [2,9]; } -def: InstRW<[SKLWriteResGroup177], (instregex "LOOPE")>; -def: InstRW<[SKLWriteResGroup177], (instregex "LOOPNE")>; - -def SKLWriteResGroup178 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { - let Latency = 11; - let NumMicroOps = 14; - let ResourceCycles = [1,1,1,4,2,5]; -} -def: InstRW<[SKLWriteResGroup178], (instregex "CMPXCHG8B")>; +def: InstRW<[SKLWriteResGroup156], (instregex "LOOPE")>; +def: InstRW<[SKLWriteResGroup156], (instregex "LOOPNE")>; -def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0]> { let Latency = 12; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTPSYr")>; -def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTPSr")>; -def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTSSr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTPSYr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTPSr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTSSr")>; -def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup158 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 12; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup180], (instregex "VSQRTPSYm")>; -def: InstRW<[SKLWriteResGroup180], (instregex "VSQRTPSm")>; -def: InstRW<[SKLWriteResGroup180], (instregex "VSQRTSSm")>; +def: InstRW<[SKLWriteResGroup158], (instregex "PCLMULQDQrm")>; +def: InstRW<[SKLWriteResGroup158], (instregex "VPCLMULQDQrm")>; -def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup159 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup159], (instregex "HADDPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "HADDPSrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "HSUBPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "HSUBPSrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHADDPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHADDPSrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHSUBPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHSUBPSrm")>; + +def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup160], (instregex "CVTTSS2SI64rm")>; + +def SKLWriteResGroup161 : SchedWriteRes<[SKLPort0]> { let Latency = 13; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup181], (instregex "SQRTPSr")>; -def: InstRW<[SKLWriteResGroup181], (instregex "SQRTSSr")>; +def: InstRW<[SKLWriteResGroup161], (instregex "SQRTPSr")>; +def: InstRW<[SKLWriteResGroup161], (instregex "SQRTSSr")>; -def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 13; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup182], (instregex "SQRTPSm")>; -def: InstRW<[SKLWriteResGroup182], (instregex "SQRTSSm")>; +def: InstRW<[SKLWriteResGroup162], (instregex "ADD_FI16m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "ADD_FI32m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUBR_FI16m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUBR_FI32m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUB_FI16m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUB_FI32m")>; -def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup187], (instregex "DPPSrri")>; -def: InstRW<[SKLWriteResGroup187], (instregex "VDPPSYrri")>; -def: InstRW<[SKLWriteResGroup187], (instregex "VDPPSrri")>; +def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>; -def SKLWriteResGroup188 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup164 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 13; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup188], (instregex "DPPSrmi")>; -def: InstRW<[SKLWriteResGroup188], (instregex "VDPPSYrmi")>; -def: InstRW<[SKLWriteResGroup188], (instregex "VDPPSrmi")>; +def: InstRW<[SKLWriteResGroup164], (instregex "DPPSrri")>; +def: InstRW<[SKLWriteResGroup164], (instregex "VDPPSYrri")>; +def: InstRW<[SKLWriteResGroup164], (instregex "VDPPSrri")>; -def SKLWriteResGroup189 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup165 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 13; - let NumMicroOps = 11; - let ResourceCycles = [2,1,1,4,1,2]; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def: InstRW<[SKLWriteResGroup189], (instregex "RCR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup189], (instregex "RCR8mCL")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPDYrm")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPSYrm")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHSUBPDYrm")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHSUBPSYrm")>; -def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0]> { let Latency = 14; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup190], (instregex "DIVPDrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "DIVSDrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "VDIVPDYrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "VDIVPDrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "VDIVSDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "DIVPDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "DIVSDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "VDIVPDYrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "VDIVPDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "VDIVSDrr")>; -def SKLWriteResGroup191 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup167 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 14; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKLWriteResGroup167], (instregex "AESIMCrm")>; +def: InstRW<[SKLWriteResGroup167], (instregex "VAESIMCrm")>; + +def SKLWriteResGroup168 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup168], (instregex "PMULLDrm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPSm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSSm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VPMULLDrm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPSm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSSm")>; + +def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup191], (instregex "DIVPDrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "DIVSDrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "VDIVPDYrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "VDIVPDrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "VDIVSDrm")>; +def: InstRW<[SKLWriteResGroup169], (instregex "MUL_FI16m")>; +def: InstRW<[SKLWriteResGroup169], (instregex "MUL_FI32m")>; -def SKLWriteResGroup192 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup170 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 14; let NumMicroOps = 10; let ResourceCycles = [2,4,1,3]; } -def: InstRW<[SKLWriteResGroup192], (instregex "RCR8rCL")>; +def: InstRW<[SKLWriteResGroup170], (instregex "RCR8rCL")>; -def SKLWriteResGroup193 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup171 : SchedWriteRes<[SKLPort0]> { let Latency = 15; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup193], (instregex "DIVR_FPrST0")>; -def: InstRW<[SKLWriteResGroup193], (instregex "DIVR_FST0r")>; -def: InstRW<[SKLWriteResGroup193], (instregex "DIVR_FrST0")>; +def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FPrST0")>; +def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FST0r")>; +def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FrST0")>; + +def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup172], (instregex "VPMULLDYrm")>; +def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPDm")>; +def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPSm")>; + +def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKLWriteResGroup173], (instregex "DPPDrmi")>; +def: InstRW<[SKLWriteResGroup173], (instregex "VDPPDrmi")>; -def SKLWriteResGroup194 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 15; + let NumMicroOps = 10; + let ResourceCycles = [1,1,1,5,1,1]; +} +def: InstRW<[SKLWriteResGroup174], (instregex "RCL(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup174], (instregex "RCL8mCL")>; + +def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 16; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup194], (instregex "DIV_F32m")>; -def: InstRW<[SKLWriteResGroup194], (instregex "DIV_F64m")>; +def: InstRW<[SKLWriteResGroup175], (instregex "DIVSSrm")>; +def: InstRW<[SKLWriteResGroup175], (instregex "VDIVSSrm")>; -def SKLWriteResGroup195 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 15; - let NumMicroOps = 8; - let ResourceCycles = [1,1,1,1,1,1,2]; +def SKLWriteResGroup176 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[SKLWriteResGroup176], (instregex "PCMPISTRIrm")>; +def: InstRW<[SKLWriteResGroup176], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SKLWriteResGroup176], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SKLWriteResGroup176], (instregex "VPCMPISTRM128rm")>; + +def SKLWriteResGroup177 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { + let Latency = 16; + let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; } -def: InstRW<[SKLWriteResGroup195], (instregex "INSB")>; -def: InstRW<[SKLWriteResGroup195], (instregex "INSL")>; -def: InstRW<[SKLWriteResGroup195], (instregex "INSW")>; +def: InstRW<[SKLWriteResGroup177], (instregex "CMPXCHG8B")>; -def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0156]> { +def SKLWriteResGroup178 : SchedWriteRes<[SKLPort0156]> { let Latency = 16; let NumMicroOps = 16; let ResourceCycles = [16]; } -def: InstRW<[SKLWriteResGroup196], (instregex "VZEROALL")>; +def: InstRW<[SKLWriteResGroup178], (instregex "VZEROALL")>; + +def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 17; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup179], (instregex "DIVPSrm")>; +def: InstRW<[SKLWriteResGroup179], (instregex "VDIVPSrm")>; +def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTSSm")>; -def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { +def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 17; let NumMicroOps = 15; let ResourceCycles = [2,1,2,4,2,4]; } -def: InstRW<[SKLWriteResGroup197], (instregex "XCH_F")>; +def: InstRW<[SKLWriteResGroup180], (instregex "XCH_F")>; -def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0]> { let Latency = 18; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup198], (instregex "VSQRTPDYr")>; -def: InstRW<[SKLWriteResGroup198], (instregex "VSQRTPDr")>; -def: InstRW<[SKLWriteResGroup198], (instregex "VSQRTSDr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTPDYr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTPDr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTSDr")>; -def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 18; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDYm")>; -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDm")>; -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTSDm")>; - -def SKLWriteResGroup200 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 18; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup200], (instregex "DIV_FI16m")>; -def: InstRW<[SKLWriteResGroup200], (instregex "DIV_FI32m")>; +def: InstRW<[SKLWriteResGroup182], (instregex "SQRTSSm")>; +def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>; +def: InstRW<[SKLWriteResGroup182], (instregex "VSQRTPSm")>; -def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort0156]> { +def SKLWriteResGroup183 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort0156]> { let Latency = 18; let NumMicroOps = 8; let ResourceCycles = [4,3,1]; } -def: InstRW<[SKLWriteResGroup201], (instregex "PCMPESTRIrr")>; -def: InstRW<[SKLWriteResGroup201], (instregex "VPCMPESTRIrr")>; +def: InstRW<[SKLWriteResGroup183], (instregex "PCMPESTRIrr")>; +def: InstRW<[SKLWriteResGroup183], (instregex "VPCMPESTRIrr")>; -def SKLWriteResGroup202 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 18; let NumMicroOps = 8; let ResourceCycles = [1,1,1,5]; } -def: InstRW<[SKLWriteResGroup202], (instregex "CPUID")>; -def: InstRW<[SKLWriteResGroup202], (instregex "RDTSC")>; +def: InstRW<[SKLWriteResGroup184], (instregex "CPUID")>; +def: InstRW<[SKLWriteResGroup184], (instregex "RDTSC")>; -def SKLWriteResGroup203 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { +def SKLWriteResGroup185 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 18; - let NumMicroOps = 9; - let ResourceCycles = [4,3,1,1]; -} -def: InstRW<[SKLWriteResGroup203], (instregex "PCMPESTRIrm")>; -def: InstRW<[SKLWriteResGroup203], (instregex "VPCMPESTRIrm")>; - -def SKLWriteResGroup204 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 18; - let NumMicroOps = 19; - let ResourceCycles = [2,1,4,1,1,4,6]; + let NumMicroOps = 11; + let ResourceCycles = [2,1,1,4,1,2]; } -def: InstRW<[SKLWriteResGroup204], (instregex "CMPXCHG16B")>; +def: InstRW<[SKLWriteResGroup185], (instregex "RCR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup185], (instregex "RCR8mCL")>; -def SKLWriteResGroup205 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015,SKLPort0156]> { +def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 19; - let NumMicroOps = 9; - let ResourceCycles = [4,3,1,1]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup205], (instregex "PCMPESTRM128rr")>; -def: InstRW<[SKLWriteResGroup205], (instregex "VPCMPESTRM128rr")>; +def: InstRW<[SKLWriteResGroup186], (instregex "DIVSDrm")>; +def: InstRW<[SKLWriteResGroup186], (instregex "SQRTPSm")>; +def: InstRW<[SKLWriteResGroup186], (instregex "VDIVSDrm")>; +def: InstRW<[SKLWriteResGroup186], (instregex "VSQRTPSYm")>; -def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015,SKLPort0156]> { +def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { let Latency = 19; - let NumMicroOps = 10; - let ResourceCycles = [4,3,1,1,1]; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; } -def: InstRW<[SKLWriteResGroup206], (instregex "PCMPESTRM128rm")>; -def: InstRW<[SKLWriteResGroup206], (instregex "VPCMPESTRM128rm")>; +def: InstRW<[SKLWriteResGroup187], (instregex "DPPSrmi")>; +def: InstRW<[SKLWriteResGroup187], (instregex "VDPPSrmi")>; -def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup188 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015,SKLPort0156]> { let Latency = 19; - let NumMicroOps = 11; - let ResourceCycles = [3,6,1,1]; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; } -def: InstRW<[SKLWriteResGroup207], (instregex "AESKEYGENASSIST128rm")>; -def: InstRW<[SKLWriteResGroup207], (instregex "VAESKEYGENASSIST128rm")>; +def: InstRW<[SKLWriteResGroup188], (instregex "PCMPESTRM128rr")>; +def: InstRW<[SKLWriteResGroup188], (instregex "VPCMPESTRM128rr")>; -def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { let Latency = 20; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup208], (instregex "DIV_FPrST0")>; -def: InstRW<[SKLWriteResGroup208], (instregex "DIV_FST0r")>; -def: InstRW<[SKLWriteResGroup208], (instregex "DIV_FrST0")>; -def: InstRW<[SKLWriteResGroup208], (instregex "SQRTPDr")>; -def: InstRW<[SKLWriteResGroup208], (instregex "SQRTSDr")>; +def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FPrST0")>; +def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FST0r")>; +def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FrST0")>; +def: InstRW<[SKLWriteResGroup189], (instregex "SQRTPDr")>; +def: InstRW<[SKLWriteResGroup189], (instregex "SQRTSDr")>; -def SKLWriteResGroup209 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 20; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup209], (instregex "DIVR_F32m")>; -def: InstRW<[SKLWriteResGroup209], (instregex "DIVR_F64m")>; -def: InstRW<[SKLWriteResGroup209], (instregex "SQRTPDm")>; -def: InstRW<[SKLWriteResGroup209], (instregex "SQRTSDm")>; +def: InstRW<[SKLWriteResGroup190], (instregex "DIVPDrm")>; +def: InstRW<[SKLWriteResGroup190], (instregex "VDIVPDrm")>; + +def SKLWriteResGroup191 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKLWriteResGroup191], (instregex "VDPPSYrmi")>; + +def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 20; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup192], (instregex "INSB")>; +def: InstRW<[SKLWriteResGroup192], (instregex "INSL")>; +def: InstRW<[SKLWriteResGroup192], (instregex "INSW")>; -def SKLWriteResGroup210 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> { +def SKLWriteResGroup193 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> { let Latency = 20; let NumMicroOps = 10; let ResourceCycles = [1,2,7]; } -def: InstRW<[SKLWriteResGroup210], (instregex "MWAITrr")>; +def: InstRW<[SKLWriteResGroup193], (instregex "MWAITrr")>; -def SKLWriteResGroup211 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { +def SKLWriteResGroup194 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { let Latency = 20; let NumMicroOps = 11; let ResourceCycles = [3,6,2]; } -def: InstRW<[SKLWriteResGroup211], (instregex "AESKEYGENASSIST128rr")>; -def: InstRW<[SKLWriteResGroup211], (instregex "VAESKEYGENASSIST128rr")>; +def: InstRW<[SKLWriteResGroup194], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[SKLWriteResGroup194], (instregex "VAESKEYGENASSIST128rr")>; -def SKLWriteResGroup212 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { - let Latency = 17; +def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup195], (instregex "VDIVPDYrm")>; + +def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 22; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F32m")>; +def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F64m")>; + +def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 22; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPDrm")>; -def SKLWriteResGroup213 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { - let Latency = 20; +def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 25; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERQPDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERDPDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPDYrm")>; + +def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 23; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup197], (instregex "VSQRTSDm")>; -def SKLWriteResGroup215 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 23; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; +} +def: InstRW<[SKLWriteResGroup198], (instregex "CMPXCHG16B")>; + +def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 24; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDm")>; + +def SKLWriteResGroup200 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { + let Latency = 24; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[SKLWriteResGroup200], (instregex "PCMPESTRIrm")>; +def: InstRW<[SKLWriteResGroup200], (instregex "VPCMPESTRIrm")>; + +def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup201], (instregex "SQRTSDm")>; +def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>; + +def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 25; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup215], (instregex "DIVR_FI16m")>; -def: InstRW<[SKLWriteResGroup215], (instregex "DIVR_FI32m")>; +def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI16m")>; +def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI32m")>; -def SKLWriteResGroup217 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { - let Latency = 23; +def SKLWriteResGroup203 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015,SKLPort0156]> { + let Latency = 25; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; +} +def: InstRW<[SKLWriteResGroup203], (instregex "PCMPESTRM128rm")>; +def: InstRW<[SKLWriteResGroup203], (instregex "VPCMPESTRM128rm")>; + +def SKLWriteResGroup204 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 25; + let NumMicroOps = 11; + let ResourceCycles = [3,6,1,1]; +} +def: InstRW<[SKLWriteResGroup204], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[SKLWriteResGroup204], (instregex "VAESKEYGENASSIST128rm")>; + +def SKLWriteResGroup205 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 26; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup205], (instregex "SQRTPDm")>; + +def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 27; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F32m")>; +def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F64m")>; + +def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { + let Latency = 28; let NumMicroOps = 8; let ResourceCycles = [2,4,1,1]; } -def: InstRW<[SKLWriteResGroup217], (instregex "IDIV(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup217], (instregex "IDIV8m")>; +def: InstRW<[SKLWriteResGroup207], (instregex "IDIV(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup207], (instregex "IDIV8m")>; -def SKLWriteResGroup222 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 30; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup208], (instregex "DIVR_FI16m")>; +def: InstRW<[SKLWriteResGroup208], (instregex "DIVR_FI32m")>; + +def SKLWriteResGroup209 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort06,SKLPort0156]> { + let Latency = 35; let NumMicroOps = 23; let ResourceCycles = [1,5,3,4,10]; } -def: InstRW<[SKLWriteResGroup222], (instregex "IN32ri")>; -def: InstRW<[SKLWriteResGroup222], (instregex "IN32rr")>; -def: InstRW<[SKLWriteResGroup222], (instregex "IN8ri")>; -def: InstRW<[SKLWriteResGroup222], (instregex "IN8rr")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN32ri")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN32rr")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN8ri")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN8rr")>; -def SKLWriteResGroup223 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 30; +def SKLWriteResGroup210 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 35; let NumMicroOps = 23; let ResourceCycles = [1,5,2,1,4,10]; } -def: InstRW<[SKLWriteResGroup223], (instregex "OUT32ir")>; -def: InstRW<[SKLWriteResGroup223], (instregex "OUT32rr")>; -def: InstRW<[SKLWriteResGroup223], (instregex "OUT8ir")>; -def: InstRW<[SKLWriteResGroup223], (instregex "OUT8rr")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT32ir")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT32rr")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT8ir")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT8rr")>; -def SKLWriteResGroup224 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { - let Latency = 32; +def SKLWriteResGroup211 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { + let Latency = 37; let NumMicroOps = 31; let ResourceCycles = [1,8,1,21]; } -def: InstRW<[SKLWriteResGroup224], (instregex "XRSTOR(64?)")>; +def: InstRW<[SKLWriteResGroup211], (instregex "XRSTOR(64?)")>; -def SKLWriteResGroup225 : SchedWriteRes<[SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort15,SKLPort0156]> { - let Latency = 35; +def SKLWriteResGroup212 : SchedWriteRes<[SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort15,SKLPort0156]> { + let Latency = 40; let NumMicroOps = 18; let ResourceCycles = [1,1,2,3,1,1,1,8]; } -def: InstRW<[SKLWriteResGroup225], (instregex "VMCLEARm")>; +def: InstRW<[SKLWriteResGroup212], (instregex "VMCLEARm")>; -def SKLWriteResGroup226 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 36; +def SKLWriteResGroup213 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 41; let NumMicroOps = 39; let ResourceCycles = [1,10,1,1,26]; } -def: InstRW<[SKLWriteResGroup226], (instregex "XSAVE64")>; +def: InstRW<[SKLWriteResGroup213], (instregex "XSAVE64")>; -def SKLWriteResGroup231 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 37; +def SKLWriteResGroup214 : SchedWriteRes<[SKLPort5,SKLPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; +} +def: InstRW<[SKLWriteResGroup214], (instregex "RDTSCP")>; + +def SKLWriteResGroup215 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 42; let NumMicroOps = 40; let ResourceCycles = [1,11,1,1,26]; } -def: InstRW<[SKLWriteResGroup231], (instregex "XSAVE")>; +def: InstRW<[SKLWriteResGroup215], (instregex "XSAVE")>; -def SKLWriteResGroup232 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 41; +def SKLWriteResGroup216 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 46; let NumMicroOps = 44; let ResourceCycles = [1,11,1,1,30]; } -def: InstRW<[SKLWriteResGroup232], (instregex "XSAVEOPT")>; - -def SKLWriteResGroup233 : SchedWriteRes<[SKLPort5,SKLPort0156]> { - let Latency = 42; - let NumMicroOps = 22; - let ResourceCycles = [2,20]; -} -def: InstRW<[SKLWriteResGroup233], (instregex "RDTSCP")>; +def: InstRW<[SKLWriteResGroup216], (instregex "XSAVEOPT")>; -def SKLWriteResGroup234 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05,SKLPort06,SKLPort0156]> { - let Latency = 57; +def SKLWriteResGroup217 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05,SKLPort06,SKLPort0156]> { + let Latency = 62; let NumMicroOps = 64; let ResourceCycles = [2,8,5,10,39]; } -def: InstRW<[SKLWriteResGroup234], (instregex "FLDENVm")>; -def: InstRW<[SKLWriteResGroup234], (instregex "FLDENVm")>; +def: InstRW<[SKLWriteResGroup217], (instregex "FLDENVm")>; +def: InstRW<[SKLWriteResGroup217], (instregex "FLDENVm")>; -def SKLWriteResGroup235 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { - let Latency = 58; +def SKLWriteResGroup218 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { + let Latency = 63; let NumMicroOps = 88; let ResourceCycles = [4,4,31,1,2,1,45]; } -def: InstRW<[SKLWriteResGroup235], (instregex "FXRSTOR64")>; +def: InstRW<[SKLWriteResGroup218], (instregex "FXRSTOR64")>; -def SKLWriteResGroup236 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { - let Latency = 58; +def SKLWriteResGroup219 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { + let Latency = 63; let NumMicroOps = 90; let ResourceCycles = [4,2,33,1,2,1,47]; } -def: InstRW<[SKLWriteResGroup236], (instregex "FXRSTOR")>; +def: InstRW<[SKLWriteResGroup219], (instregex "FXRSTOR")>; -def SKLWriteResGroup239 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> { +def SKLWriteResGroup220 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> { let Latency = 75; let NumMicroOps = 15; let ResourceCycles = [6,3,6]; } -def: InstRW<[SKLWriteResGroup239], (instregex "FNINIT")>; +def: InstRW<[SKLWriteResGroup220], (instregex "FNINIT")>; -def SKLWriteResGroup240 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { +def SKLWriteResGroup221 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 76; let NumMicroOps = 32; let ResourceCycles = [7,2,8,3,1,11]; } -def: InstRW<[SKLWriteResGroup240], (instregex "DIV(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup221], (instregex "DIV(16|32|64)r")>; -def SKLWriteResGroup241 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup222 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 102; let NumMicroOps = 66; let ResourceCycles = [4,2,4,8,14,34]; } -def: InstRW<[SKLWriteResGroup241], (instregex "IDIV(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup222], (instregex "IDIV(16|32|64)r")>; -def SKLWriteResGroup242 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 105; +def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 106; let NumMicroOps = 100; let ResourceCycles = [9,1,11,16,1,11,21,30]; } -def: InstRW<[SKLWriteResGroup242], (instregex "FSTENVm")>; -def: InstRW<[SKLWriteResGroup242], (instregex "FSTENVm")>; +def: InstRW<[SKLWriteResGroup223], (instregex "FSTENVm")>; +def: InstRW<[SKLWriteResGroup223], (instregex "FSTENVm")>; } // SchedModel diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86VZeroUpper.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86VZeroUpper.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Target/X86/X86VZeroUpper.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Target/X86/X86VZeroUpper.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -132,9 +132,8 @@ } static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) { - for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(), - E = MRI.livein_end(); I != E; ++I) - if (isYmmOrZmmReg(I->first)) + for (std::pair LI : MRI.liveins()) + if (isYmmOrZmmReg(LI.first)) return true; return false; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/InstCombine/InstCombineCompares.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/InstCombine/InstCombineCompares.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/InstCombine/InstCombineCompares.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/InstCombine/InstCombineCompares.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -1318,6 +1318,24 @@ return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } +// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) +Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { + CmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = Cmp.getOperand(0); + + if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) { + Value *A, *B; + SelectPatternResult SPR = matchSelectPattern(X, A, B); + if (SPR.Flavor == SPF_SMIN) { + if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) + return new ICmpInst(Pred, B, Cmp.getOperand(1)); + if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) + return new ICmpInst(Pred, A, Cmp.getOperand(1)); + } + } + return nullptr; +} + // Fold icmp Pred X, C. Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); @@ -1349,17 +1367,6 @@ return Res; } - // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) - if (C->isNullValue() && Pred == ICmpInst::ICMP_SGT) { - SelectPatternResult SPR = matchSelectPattern(X, A, B); - if (SPR.Flavor == SPF_SMIN) { - if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) - return new ICmpInst(Pred, B, Cmp.getOperand(1)); - if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) - return new ICmpInst(Pred, A, Cmp.getOperand(1)); - } - } - // FIXME: Use m_APInt to allow folds for splat constants. ConstantInt *CI = dyn_cast(Cmp.getOperand(1)); if (!CI) @@ -2062,9 +2069,8 @@ // If the bits shifted out are known zero, compare the unshifted value: // (X & 4) >> 1 == 2 --> (X & 4) == 4. - Constant *ShiftedCmpRHS = ConstantInt::get(ShrTy, C << ShAmtVal); if (Shr->isExact()) - return new ICmpInst(Pred, X, ShiftedCmpRHS); + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal)); if (Shr->hasOneUse()) { // Canonicalize the shift into an 'and': @@ -2072,7 +2078,7 @@ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); Constant *Mask = ConstantInt::get(ShrTy, Val); Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); - return new ICmpInst(Pred, And, ShiftedCmpRHS); + return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal)); } return nullptr; @@ -4462,6 +4468,10 @@ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) return nullptr; + // Do this after checking for min/max to prevent infinite looping. + if (Instruction *Res = foldICmpWithZero(I)) + return Res; + // FIXME: We only do this after checking for min/max to prevent infinite // looping caused by a reverse canonicalization of these patterns for min/max. // FIXME: The organization of folds is a mess. These would naturally go into diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/InstCombine/InstCombineInternal.h llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/InstCombine/InstCombineInternal.h --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/InstCombine/InstCombineInternal.h 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/InstCombine/InstCombineInternal.h 2017-10-17 14:41:13.000000000 +0000 @@ -700,6 +700,7 @@ Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp); Instruction *foldICmpBinOp(ICmpInst &Cmp); Instruction *foldICmpEquality(ICmpInst &Cmp); + Instruction *foldICmpWithZero(ICmpInst &Cmp); Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select, ConstantInt *C); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/ObjCARC/ObjCARCOpts.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/ObjCARC/ObjCARCOpts.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/ObjCARC/ObjCARCOpts.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/ObjCARC/ObjCARCOpts.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -808,9 +808,14 @@ // If Arg is a PHI, and one or more incoming values to the // PHI are null, and the call is control-equivalent to the PHI, and there - // are no relevant side effects between the PHI and the call, the call - // could be pushed up to just those paths with non-null incoming values. - // For now, don't bother splitting critical edges for this. + // are no relevant side effects between the PHI and the call, and the call + // is not a release that doesn't have the clang.imprecise_release tag, the + // call could be pushed up to just those paths with non-null incoming + // values. For now, don't bother splitting critical edges for this. + if (Class == ARCInstKind::Release && + !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease))) + continue; + SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(Inst, Arg)); do { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/GVN.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/GVN.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/GVN.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/GVN.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -38,7 +38,6 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -1049,32 +1048,7 @@ // backwards through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI); - // Check that there is no implicit control flow instructions above our load in - // its block. If there is an instruction that doesn't always pass the - // execution to the following instruction, then moving through it may become - // invalid. For example: - // - // int arr[LEN]; - // int index = ???; - // ... - // guard(0 <= index && index < LEN); - // use(arr[index]); - // - // It is illegal to move the array access to any point above the guard, - // because if the index is out of bounds we should deoptimize rather than - // access the array. - // Check that there is no guard in this block above our intruction. - if (!IsSafeToSpeculativelyExecute) { - auto It = FirstImplicitControlFlowInsts.find(TmpBB); - if (It != FirstImplicitControlFlowInsts.end()) { - assert(It->second->getParent() == TmpBB && - "Implicit control flow map broken?"); - if (OI->dominates(It->second, LI)) - return false; - } - } while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1089,11 +1063,6 @@ // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) return false; - - // Check that there is no implicit control flow in a block above. - if (!IsSafeToSpeculativelyExecute && - FirstImplicitControlFlowInsts.count(TmpBB)) - return false; } assert(TmpBB); @@ -2014,8 +1983,6 @@ TLI = &RunTLI; VN.setAliasAnalysis(&RunAA); MD = RunMD; - OrderedInstructions OrderedInstrs(DT); - OI = &OrderedInstrs; VN.setMemDep(MD); ORE = RunORE; @@ -2105,9 +2072,6 @@ DEBUG(verifyRemoved(*I)); (*I)->eraseFromParent(); } - - if (!InstrsToErase.empty()) - OI->invalidateBlock(BB); InstrsToErase.clear(); if (AtStart) @@ -2303,7 +2267,6 @@ MD->removeInstruction(CurInst); DEBUG(verifyRemoved(CurInst)); CurInst->eraseFromParent(); - OI->invalidateBlock(CurrentBlock); ++NumGVNInstr; return true; @@ -2370,7 +2333,6 @@ // RPOT walks the graph in its constructor and will not be invalidated during // processBlock. ReversePostOrderTraversal RPOT(&F); - fillImplicitControlFlowInfo(RPOT); for (BasicBlock *BB : RPOT) Changed |= processBlock(BB); @@ -2382,45 +2344,6 @@ LeaderTable.clear(); BlockRPONumber.clear(); TableAllocator.Reset(); - FirstImplicitControlFlowInsts.clear(); -} - -void -GVN::fillImplicitControlFlowInfo(ReversePostOrderTraversal &RPOT) { - auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) { - // If a block's instruction doesn't always pass the control to its successor - // instruction, mark the block as having implicit control flow. We use them - // to avoid wrong assumptions of sort "if A is executed and B post-dominates - // A, then B is also executed". This is not true is there is an implicit - // control flow instruction (e.g. a guard) between them. - // - // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false - // for volatile stores and loads because they can trap. The discussion on - // whether or not it is correct is still ongoing. We might want to get rid - // of this logic in the future. Anyways, trapping instructions shouldn't - // introduce implicit control flow, so we explicitly allow them here. This - // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed. - if (isGuaranteedToTransferExecutionToSuccessor(I)) - return false; - if (isa(I)) { - assert(cast(I)->isVolatile() && - "Non-volatile load should transfer execution to successor!"); - return false; - } - if (isa(I)) { - assert(cast(I)->isVolatile() && - "Non-volatile store should transfer execution to successor!"); - return false; - } - return true; - }; - - for (BasicBlock *BB : RPOT) - for (auto &I : *BB) - if (MayNotTransferExecutionToSuccessor(&I)) { - FirstImplicitControlFlowInsts[BB] = &I; - break; - } } /// Verify that the specified instruction does not occur in our diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/IndVarSimplify.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/IndVarSimplify.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/IndVarSimplify.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/IndVarSimplify.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -25,27 +25,54 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" @@ -53,6 +80,10 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "indvars" @@ -91,6 +122,7 @@ cl::desc("Disable Linear Function Test Replace optimization")); namespace { + struct RewritePhi; class IndVarSimplify { @@ -131,7 +163,8 @@ bool run(Loop *L); }; -} + +} // end anonymous namespace /// Return true if the SCEV expansion generated by the rewriter can replace the /// original value. SCEV guarantees that it produces the same value, but the way @@ -251,7 +284,6 @@ /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); -/// void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; @@ -305,7 +337,6 @@ L->contains(TheBr->getSuccessor(1)))) return; - // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast(Compare->getOperand(1)); @@ -373,7 +404,6 @@ // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; - } else { // If we have a negative stride, we require the init to be greater than the // exit value. @@ -452,7 +482,6 @@ // First step. Check to see if there are any floating-point recurrences. // If there are, change them into integer recurrences, permitting analysis by // the SCEV routines. - // BasicBlock *Header = L->getHeader(); SmallVector PHIs; @@ -472,18 +501,26 @@ } namespace { + // Collect information about PHI nodes which can be transformed in // rewriteLoopExitValues. struct RewritePhi { PHINode *PN; - unsigned Ith; // Ith incoming value. - Value *Val; // Exit value after expansion. - bool HighCost; // High Cost when expansion. + + // Ith incoming value. + unsigned Ith; + + // Exit value after expansion. + Value *Val; + + // High Cost when expansion. + bool HighCost; RewritePhi(PHINode *P, unsigned I, Value *V, bool H) : PN(P), Ith(I), Val(V), HighCost(H) {} }; -} + +} // end anonymous namespace Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, Instruction *InsertPt, @@ -747,7 +784,6 @@ /// aggressively. bool IndVarSimplify::canLoopBeDeleted( Loop *L, SmallVector &RewritePhiSet) { - BasicBlock *Preheader = L->getLoopPreheader(); // If there is no preheader, the loop will not be deleted. if (!Preheader) @@ -790,7 +826,9 @@ } for (auto *BB : L->blocks()) - if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) + if (llvm::any_of(*BB, [](Instruction &I) { + return I.mayHaveSideEffects(); + })) return false; return true; @@ -801,15 +839,21 @@ //===----------------------------------------------------------------------===// namespace { + // Collect information about induction variables that are used by sign/zero // extend operations. This information is recorded by CollectExtend and provides // the input to WidenIV. struct WideIVInfo { PHINode *NarrowIV = nullptr; - Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext - bool IsSigned = false; // Was a sext user seen before a zext? + + // Widest integer type created [sz]ext + Type *WidestNativeType = nullptr; + + // Was a sext user seen before a zext? + bool IsSigned = false; }; -} + +} // end anonymous namespace /// Update information about the induction variable that is extended by this /// sign or zero extend operation. This is used to determine the final width of @@ -885,7 +929,6 @@ /// creating any new induction variables. To do this, it creates a new phi of /// the wider type and redirects all users, either removing extends or inserting /// truncs whenever we stop propagating the type. -/// class WidenIV { // Parameters PHINode *OrigPhi; @@ -902,22 +945,24 @@ bool HasGuards; // Result - PHINode *WidePhi; - Instruction *WideInc; - const SCEV *WideIncExpr; + PHINode *WidePhi = nullptr; + Instruction *WideInc = nullptr; + const SCEV *WideIncExpr = nullptr; SmallVectorImpl &DeadInsts; SmallPtrSet Widened; SmallVector NarrowIVUsers; enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + // A map tracking the kind of extension used to widen each narrow IV // and narrow IV user. // Key: pointer to a narrow IV or IV user. // Value: the kind of extension used to widen this Instruction. DenseMap, ExtendKind> ExtendKindMap; - typedef std::pair, AssertingVH> DefUserPair; + using DefUserPair = std::pair, AssertingVH>; + // A map with control-dependent ranges for post increment IV uses. The key is // a pair of IV def and a use of this def denoting the context. The value is // a ConstantRange representing possible values of the def at the given @@ -935,6 +980,7 @@ void calculatePostIncRanges(PHINode *OrigPhi); void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { DefUserPair Key(Def, UseI); auto It = PostIncRangeInfos.find(Key); @@ -950,8 +996,7 @@ bool HasGuards) : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), - WideIncExpr(nullptr), DeadInsts(DI) { + HasGuards(HasGuards), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } @@ -969,7 +1014,7 @@ ExtendKind getExtendKind(Instruction *I); - typedef std::pair WidenedRecTy; + using WidenedRecTy = std::pair; WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); @@ -984,7 +1029,8 @@ void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; -} // anonymous namespace + +} // end anonymous namespace /// Perform a quick domtree based check for loop invariance assuming that V is /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this @@ -1182,7 +1228,6 @@ /// operands is an AddRec for this loop, return the AddRec and the kind of /// extension used. WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { - // Handle the common case of add const unsigned OpCode = DU.NarrowUse->getOpcode(); // Only Add/Sub/Mul instructions supported yet. @@ -1310,7 +1355,7 @@ Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); unsigned IVWidth = SE->getTypeSizeInBits(WideType); - assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); + assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. IRBuilder<> Builder( @@ -1461,7 +1506,6 @@ } /// Add eligible users of NarrowDef to NarrowIVUsers. -/// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); bool NonNegativeDef = @@ -1494,7 +1538,6 @@ /// /// It would be simpler to delete uses as they are processed, but we must avoid /// invalidating SCEV expressions. -/// PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); @@ -1696,12 +1739,12 @@ // Live IV Reduction - Minimize IVs live across the loop. //===----------------------------------------------------------------------===// - //===----------------------------------------------------------------------===// // Simplification of IV users based on SCEV evaluation. //===----------------------------------------------------------------------===// namespace { + class IndVarSimplifyVisitor : public IVVisitor { ScalarEvolution *SE; const TargetTransformInfo *TTI; @@ -1721,14 +1764,14 @@ // Implement the interface used by simplifyUsersOfIV. void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } }; -} + +} // end anonymous namespace /// Iteratively perform simplification on a worklist of IV users. Each /// successive simplification may push more users which may themselves be /// candidates for simplification. /// /// Sign/Zero extend elimination is interleaved with IV simplification. -/// void IndVarSimplify::simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI) { @@ -2502,8 +2545,10 @@ } namespace { + struct IndVarSimplifyLegacyPass : public LoopPass { static char ID; // Pass identification, replacement for typeid + IndVarSimplifyLegacyPass() : LoopPass(ID) { initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -2530,9 +2575,11 @@ getLoopAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char IndVarSimplifyLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/JumpThreading.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/JumpThreading.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/JumpThreading.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/JumpThreading.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -648,8 +648,6 @@ return true; } - PredValueInfoTy LHSVals, RHSVals; - // Handle some boolean conditions. if (I->getType()->getPrimitiveSizeInBits() == 1) { assert(Preference == WantInteger && "One-bit non-integer type?"); @@ -657,6 +655,8 @@ // X & false -> false if (I->getOpcode() == Instruction::Or || I->getOpcode() == Instruction::And) { + PredValueInfoTy LHSVals, RHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, WantInteger, CxtI); ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopDistribute.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopDistribute.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopDistribute.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopDistribute.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -23,32 +23,61 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include #include +#include +#include + +using namespace llvm; #define LDIST_NAME "loop-distribute" #define DEBUG_TYPE LDIST_NAME -using namespace llvm; - static cl::opt LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " @@ -81,14 +110,15 @@ STATISTIC(NumLoopsDistributed, "Number of loops distributed"); namespace { + /// \brief Maintains the set of instructions of the loop for a partition before /// cloning. After cloning, it hosts the new loop. class InstPartition { - typedef SmallPtrSet InstructionSet; + using InstructionSet = SmallPtrSet; public: InstPartition(Instruction *I, Loop *L, bool DepCycle = false) - : DepCycle(DepCycle), OrigLoop(L), ClonedLoop(nullptr) { + : DepCycle(DepCycle), OrigLoop(L) { Set.insert(I); } @@ -220,7 +250,7 @@ /// \brief The cloned loop. If this partition is mapped to the original loop, /// this is null. - Loop *ClonedLoop; + Loop *ClonedLoop = nullptr; /// \brief The blocks of ClonedLoop including the preheader. If this /// partition is mapped to the original loop, this is empty. @@ -235,7 +265,7 @@ /// \brief Holds the set of Partitions. It populates them, merges them and then /// clones the loops. class InstPartitionContainer { - typedef DenseMap InstToPartitionIdT; + using InstToPartitionIdT = DenseMap; public: InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT) @@ -308,8 +338,8 @@ /// /// Return if any partitions were merged. bool mergeToAvoidDuplicatedLoads() { - typedef DenseMap LoadToPartitionT; - typedef EquivalenceClasses ToBeMergedT; + using LoadToPartitionT = DenseMap; + using ToBeMergedT = EquivalenceClasses; LoadToPartitionT LoadToPartition; ToBeMergedT ToBeMerged; @@ -511,7 +541,7 @@ } private: - typedef std::list PartitionContainerT; + using PartitionContainerT = std::list; /// \brief List of partitions. PartitionContainerT PartitionContainer; @@ -552,17 +582,17 @@ /// By traversing the memory instructions in program order and accumulating this /// number, we know whether any unsafe dependence crosses over a program point. class MemoryInstructionDependences { - typedef MemoryDepChecker::Dependence Dependence; + using Dependence = MemoryDepChecker::Dependence; public: struct Entry { Instruction *Inst; - unsigned NumUnsafeDependencesStartOrEnd; + unsigned NumUnsafeDependencesStartOrEnd = 0; - Entry(Instruction *Inst) : Inst(Inst), NumUnsafeDependencesStartOrEnd(0) {} + Entry(Instruction *Inst) : Inst(Inst) {} }; - typedef SmallVector AccessesType; + using AccessesType = SmallVector; AccessesType::const_iterator begin() const { return Accesses.begin(); } AccessesType::const_iterator end() const { return Accesses.end(); } @@ -594,7 +624,7 @@ public: LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) - : L(L), F(F), LI(LI), LAI(nullptr), DT(DT), SE(SE), ORE(ORE) { + : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) { setForced(); } @@ -861,7 +891,7 @@ // Analyses used. LoopInfo *LI; - const LoopAccessInfo *LAI; + const LoopAccessInfo *LAI = nullptr; DominatorTree *DT; ScalarEvolution *SE; OptimizationRemarkEmitter *ORE; @@ -875,6 +905,8 @@ Optional IsForced; }; +} // end anonymous namespace + /// Shared implementation between new and old PMs. static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, @@ -905,9 +937,13 @@ return Changed; } +namespace { + /// \brief The pass class. class LoopDistributeLegacy : public FunctionPass { public: + static char ID; + LoopDistributeLegacy() : FunctionPass(ID) { // The default is set by the caller. initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry()); @@ -938,10 +974,9 @@ AU.addRequired(); AU.addPreserved(); } - - static char ID; }; -} // anonymous namespace + +} // end anonymous namespace PreservedAnalyses LoopDistributePass::run(Function &F, FunctionAnalysisManager &AM) { @@ -975,6 +1010,7 @@ } char LoopDistributeLegacy::ID; + static const char ldist_name[] = "Loop Distribution"; INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, @@ -986,6 +1022,4 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) -namespace llvm { -FunctionPass *createLoopDistributePass() { return new LoopDistributeLegacy(); } -} +FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopIdiomRecognize.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopIdiomRecognize.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopIdiomRecognize.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -1,4 +1,4 @@ -//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// +//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===// // // The LLVM Compiler Infrastructure // @@ -38,32 +38,64 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "loop-idiom" @@ -80,7 +112,7 @@ namespace { class LoopIdiomRecognize { - Loop *CurLoop; + Loop *CurLoop = nullptr; AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; @@ -96,20 +128,21 @@ TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const DataLayout *DL) - : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), - DL(DL) {} + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {} bool runOnLoop(Loop *L); private: - typedef SmallVector StoreList; - typedef MapVector StoreListMap; + using StoreList = SmallVector; + using StoreListMap = MapVector; + StoreListMap StoreRefsForMemset; StoreListMap StoreRefsForMemsetPattern; StoreList StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + /// Return code for isLegalStore() enum LegalStoreKind { None = 0, @@ -164,6 +197,7 @@ class LoopIdiomRecognizeLegacyPass : public LoopPass { public: static char ID; + explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) { initializeLoopIdiomRecognizeLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -190,14 +224,16 @@ /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. - /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); getLoopAnalysisUsage(AU); } }; -} // End anonymous namespace. + +} // end anonymous namespace + +char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, @@ -211,7 +247,6 @@ return getLoopPassPreservedAnalyses(); } -char LoopIdiomRecognizeLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) @@ -354,7 +389,6 @@ LoopIdiomRecognize::LegalStoreKind LoopIdiomRecognize::isLegalStore(StoreInst *SI) { - // Don't touch volatile stores. if (SI->isVolatile()) return LegalStoreKind::None; @@ -1488,7 +1522,7 @@ /// PhiX = PHI [InitX, DefX] /// CntInst = CntPhi + 1 /// DefX = PhiX >> 1 -// LOOP_BODY +/// LOOP_BODY /// Br: loop if (DefX != 0) /// Use(CntPhi) or Use(CntInst) /// diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopInstSimplify.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopInstSimplify.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopInstSimplify.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopInstSimplify.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -12,22 +12,33 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/User.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include + using namespace llvm; #define DEBUG_TYPE "loop-instsimplify" @@ -45,7 +56,7 @@ // The bit we are stealing from the pointer represents whether this basic // block is the header of a subloop, in which case we only process its phis. - typedef PointerIntPair WorklistItem; + using WorklistItem = PointerIntPair; SmallVector VisitStack; SmallPtrSet Visited; @@ -151,9 +162,11 @@ } namespace { + class LoopInstSimplifyLegacyPass : public LoopPass { public: static char ID; // Pass ID, replacement for typeid + LoopInstSimplifyLegacyPass() : LoopPass(ID) { initializeLoopInstSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -181,7 +194,8 @@ getLoopAnalysisUsage(AU); } }; -} + +} // end anonymous namespace PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, @@ -195,6 +209,7 @@ } char LoopInstSimplifyLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify", "Simplify instructions in loops", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopInterchange.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopInterchange.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopInterchange.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopInterchange.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -1,4 +1,4 @@ -//===- LoopInterchange.cpp - Loop interchange pass------------------------===// +//===- LoopInterchange.cpp - Loop interchange pass-------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,33 +13,38 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include using namespace llvm; @@ -51,10 +56,12 @@ namespace { -typedef SmallVector LoopVector; +using LoopVector = SmallVector; // TODO: Check if we can use a sparse matrix here. -typedef std::vector> CharMatrix; +using CharMatrix = std::vector>; + +} // end anonymous namespace // Maximum number of dependencies that can be handled in the dependency matrix. static const unsigned MaxMemInstrCount = 100; @@ -62,10 +69,8 @@ // Maximum loop depth supported. static const unsigned MaxLoopNestDepth = 10; -struct LoopInterchange; - #ifdef DUMP_DEP_MATRICIES -void printDepMatrix(CharMatrix &DepMatrix) { +static void printDepMatrix(CharMatrix &DepMatrix) { for (auto &Row : DepMatrix) { for (auto D : Row) DEBUG(dbgs() << D << " "); @@ -76,7 +81,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI) { - typedef SmallVector ValueVector; + using ValueVector = SmallVector; + ValueVector MemInstr; // For each block. @@ -168,7 +174,7 @@ } // We don't have a DepMatrix to check legality return false. - if (DepMatrix.size() == 0) + if (DepMatrix.empty()) return false; return true; } @@ -213,7 +219,6 @@ static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep) { - if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) return false; @@ -252,7 +257,6 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId) { - unsigned NumRows = DepMatrix.size(); // For each row check if it is valid to interchange. for (unsigned Row = 0; Row < NumRows; ++Row) { @@ -267,7 +271,6 @@ } static void populateWorklist(Loop &L, SmallVector &V) { - DEBUG(dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n'); @@ -317,6 +320,8 @@ return nullptr; } +namespace { + /// LoopInterchangeLegality checks if it is legal to interchange the loop. class LoopInterchangeLegality { public: @@ -324,11 +329,12 @@ LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, OptimizationRemarkEmitter *ORE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {} + PreserveLCSSA(PreserveLCSSA), ORE(ORE) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix); + /// Check if the loop structure is understood. We do not handle triangular /// loops for now. bool isLoopStructureUnderstood(PHINode *InnerInductionVar); @@ -345,6 +351,7 @@ bool findInductionAndReductions(Loop *L, SmallVector &Inductions, SmallVector &Reductions); + Loop *OuterLoop; Loop *InnerLoop; @@ -352,10 +359,11 @@ LoopInfo *LI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - bool InnerLoopHasReduction; + bool InnerLoopHasReduction = false; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -378,6 +386,7 @@ /// Scev analysis. ScalarEvolution *SE; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; }; @@ -412,6 +421,7 @@ /// Scev analysis. ScalarEvolution *SE; + LoopInfo *LI; DominatorTree *DT; BasicBlock *LoopExit; @@ -421,16 +431,16 @@ // Main LoopInterchange Pass. struct LoopInterchange : public FunctionPass { static char ID; - ScalarEvolution *SE; - LoopInfo *LI; - DependenceInfo *DI; - DominatorTree *DT; + ScalarEvolution *SE = nullptr; + LoopInfo *LI = nullptr; + DependenceInfo *DI = nullptr; + DominatorTree *DT = nullptr; bool PreserveLCSSA; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - LoopInterchange() - : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) { + LoopInterchange() : FunctionPass(ID) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); } @@ -498,7 +508,6 @@ } bool processLoopList(LoopVector LoopList, Function &F) { - bool Changed = false; unsigned LoopNestDepth = LoopList.size(); if (LoopNestDepth < 2) { @@ -577,7 +586,6 @@ bool processLoop(LoopVector LoopList, unsigned InnerLoopId, unsigned OuterLoopId, BasicBlock *LoopNestExit, std::vector> &DependencyMatrix) { - DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); Loop *InnerLoop = LoopList[InnerLoopId]; @@ -611,9 +619,10 @@ } }; -} // end of namespace +} // end anonymous namespace + bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { - return none_of(Ins->users(), [=](User *U) -> bool { + return llvm::none_of(Ins->users(), [=](User *U) -> bool { auto *UserIns = dyn_cast(U); RecurrenceDescriptor RD; return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); @@ -679,10 +688,8 @@ return true; } - bool LoopInterchangeLegality::isLoopStructureUnderstood( PHINode *InnerInduction) { - unsigned Num = InnerInduction->getNumOperands(); BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); for (unsigned i = 0; i < Num; ++i) { @@ -761,7 +768,6 @@ // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); @@ -918,7 +924,7 @@ // instruction. bool FoundInduction = false; - for (const Instruction &I : reverse(*InnerLoopLatch)) { + for (const Instruction &I : llvm::reverse(*InnerLoopLatch)) { if (isa(I) || isa(I) || isa(I) || isa(I)) continue; @@ -959,7 +965,6 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId @@ -1101,7 +1106,6 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - // TODO: Add better profitability checks. // e.g // 1) Construct dependency matrix and move the one with no loop carried dep @@ -1167,7 +1171,7 @@ bool Transformed = false; Instruction *InnerIndexVar; - if (InnerLoop->getSubLoops().size() == 0) { + if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); DEBUG(dbgs() << "Calling Split Inner Loop\n"); PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); @@ -1181,7 +1185,6 @@ else InnerIndexVar = dyn_cast(InductionPHI->getIncomingValue(0)); - // // Split at the place were the induction variable is // incremented/decremented. // TODO: This splitting logic may not work always. Fix this. @@ -1210,7 +1213,6 @@ } void LoopInterchangeTransform::splitInnerLoopHeader() { - // Split the inner loop header out. Here make sure that the reduction PHI's // stay in the innerloop body. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1266,7 +1268,6 @@ } bool LoopInterchangeTransform::adjustLoopBranches() { - DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1374,8 +1375,8 @@ return true; } -void LoopInterchangeTransform::adjustLoopPreheaders() { +void LoopInterchangeTransform::adjustLoopPreheaders() { // We have interchanged the preheaders so we need to interchange the data in // the preheader as well. // This is because the content of inner preheader was previously executed @@ -1395,7 +1396,6 @@ } bool LoopInterchangeTransform::adjustLoopLinks() { - // Adjust all branches in the inner and outer loop. bool Changed = adjustLoopBranches(); if (Changed) @@ -1404,6 +1404,7 @@ } char LoopInterchange::ID = 0; + INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopLoadElimination.cpp llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopLoadElimination.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lib/Transforms/Scalar/LoopLoadElimination.cpp 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lib/Transforms/Scalar/LoopLoadElimination.cpp 2017-10-17 14:41:13.000000000 +0000 @@ -28,22 +28,29 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include @@ -53,11 +60,11 @@ #include #include +using namespace llvm; + #define LLE_OPTION "loop-load-elim" #define DEBUG_TYPE LLE_OPTION -using namespace llvm; - static cl::opt CheckPerElim( "runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), @@ -127,10 +134,12 @@ #endif }; +} // end anonymous namespace + /// \brief Check if the store dominates all latches, so as long as there is no /// intervening store this value will be loaded in the next iteration. -bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, - DominatorTree *DT) { +static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, + DominatorTree *DT) { SmallVector Latches; L->getLoopLatches(Latches); return llvm::all_of(Latches, [&](const BasicBlock *Latch) { @@ -143,6 +152,8 @@ return Load->getParent() != L->getHeader(); } +namespace { + /// \brief The per-loop class that does most of the work. class LoadEliminationForLoop { public: @@ -241,8 +252,8 @@ std::forward_list &Candidates) { // If Store is nullptr it means that we have multiple stores forwarding to // this store. - typedef DenseMap - LoadToSingleCandT; + using LoadToSingleCandT = + DenseMap; LoadToSingleCandT LoadToSingleCand; for (const auto &Cand : Candidates) { @@ -393,7 +404,6 @@ void propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand, SCEVExpander &SEE) { - // // loop: // %x = load %gep_i // = ... %x @@ -431,6 +441,7 @@ bool processLoop() { DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName() << "\" checking " << *L << "\n"); + // Look for store-to-load forwarding cases across the // backedge. E.g.: // @@ -558,6 +569,8 @@ PredicatedScalarEvolution PSE; }; +} // end anonymous namespace + static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, function_ref GetLAI) { @@ -584,10 +597,14 @@ return Changed; } +namespace { + /// \brief The pass. Most of the work is delegated to the per-loop /// LoadEliminationForLoop class. class LoopLoadElimination : public FunctionPass { public: + static char ID; + LoopLoadElimination() : FunctionPass(ID) { initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry()); } @@ -616,13 +633,12 @@ AU.addPreserved(); AU.addPreserved(); } - - static char ID; }; } // end anonymous namespace char LoopLoadElimination::ID; + static const char LLE_name[] = "Loop Load Elimination"; INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) @@ -633,9 +649,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) -namespace llvm { - -FunctionPass *createLoopLoadEliminationPass() { +FunctionPass *llvm::createLoopLoadEliminationPass() { return new LoopLoadElimination(); } @@ -662,5 +676,3 @@ PreservedAnalyses PA; return PA; } - -} // end namespace llvm diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/COFF/Driver.cpp llvm-toolchain-snapshot-6.0~svn316003/lld/COFF/Driver.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lld/COFF/Driver.cpp 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/COFF/Driver.cpp 2017-10-17 14:42:28.000000000 +0000 @@ -113,16 +113,15 @@ void LinkerDriver::addBuffer(std::unique_ptr MB, bool WholeArchive) { MemoryBufferRef MBRef = takeBuffer(std::move(MB)); + FilePaths.push_back(MBRef.getBufferIdentifier()); // File type is detected by contents, not by file extension. - file_magic Magic = identify_magic(MBRef.getBuffer()); - if (Magic == file_magic::windows_resource) { + switch (identify_magic(MBRef.getBuffer())) { + case file_magic::windows_resource: Resources.push_back(MBRef); - return; - } + break; - FilePaths.push_back(MBRef.getBufferIdentifier()); - if (Magic == file_magic::archive) { + case file_magic::archive: if (WholeArchive) { std::unique_ptr File = check(Archive::create(MBRef), @@ -133,19 +132,21 @@ return; } Symtab->addFile(make(MBRef)); - return; - } + break; - if (Magic == file_magic::bitcode) { + case file_magic::bitcode: Symtab->addFile(make(MBRef)); - return; - } + break; - if (Magic == file_magic::coff_cl_gl_object) + case file_magic::coff_cl_gl_object: error(MBRef.getBufferIdentifier() + ": is not a native COFF file. " "Recompile without /GL"); - else + break; + + default: Symtab->addFile(make(MBRef)); + break; + } } void LinkerDriver::enqueuePath(StringRef Path, bool WholeArchive) { @@ -547,7 +548,7 @@ } } -// Get a sorted list of symbols not to automatically export +// Get a set of symbols not to automatically export // when exporting all global symbols for MinGW. static StringSet<> getExportExcludeSymbols() { if (Config->Machine == I386) @@ -555,7 +556,7 @@ "__NULL_IMPORT_DESCRIPTOR", "__pei386_runtime_relocator", "_do_pseudo_reloc", - "_impure_ptr", + "_impure_ptr", "__impure_ptr", "__fmode", "_environ", @@ -1078,7 +1079,7 @@ // WindowsResource to convert resource files to a regular COFF file, // then link the resulting file normally. if (!Resources.empty()) - addBuffer(convertResToCOFF(Resources), false); + Symtab->addFile(make(convertResToCOFF(Resources))); if (Tar) Tar->append("response.txt", diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/COFF/Driver.h llvm-toolchain-snapshot-6.0~svn316003/lld/COFF/Driver.h --- llvm-toolchain-snapshot-6.0~svn315865/lld/COFF/Driver.h 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/COFF/Driver.h 2017-10-17 14:42:28.000000000 +0000 @@ -169,10 +169,8 @@ // incompatible objects. void checkFailIfMismatch(StringRef Arg); -// Convert Windows resource files (.res files) to a .obj file -// using cvtres.exe. -std::unique_ptr -convertResToCOFF(const std::vector &MBs); +// Convert Windows resource files (.res files) to a .obj file. +MemoryBufferRef convertResToCOFF(const std::vector &MBs); void runMSVCLinker(std::string Rsp, ArrayRef Objects); diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/COFF/DriverUtils.cpp llvm-toolchain-snapshot-6.0~svn316003/lld/COFF/DriverUtils.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lld/COFF/DriverUtils.cpp 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/COFF/DriverUtils.cpp 2017-10-17 14:42:28.000000000 +0000 @@ -423,7 +423,8 @@ sizeof(object::WinResHeaderPrefix) + sizeof(object::WinResIDs) + sizeof(object::WinResHeaderSuffix) + ManifestSize, object::WIN_RES_DATA_ALIGNMENT); - return MemoryBuffer::getNewMemBuffer(ResSize); + return MemoryBuffer::getNewMemBuffer(ResSize, + Config->OutputFile + ".manifest.res"); } static void writeResFileHeader(char *&Buf) { @@ -638,10 +639,8 @@ Config->MustMatch[K] = V; } -// Convert Windows resource files (.res files) to a .obj file -// using cvtres.exe. -std::unique_ptr -convertResToCOFF(const std::vector &MBs) { +// Convert Windows resource files (.res files) to a .obj file. +MemoryBufferRef convertResToCOFF(const std::vector &MBs) { object::WindowsResourceParser Parser; for (MemoryBufferRef MB : MBs) { @@ -657,7 +656,10 @@ llvm::object::writeWindowsResourceCOFF(Config->Machine, Parser); if (!E) fatal(errorToErrorCode(E.takeError()), "failed to write .res to COFF"); - return std::move(E.get()); + + MemoryBufferRef MBRef = **E; + make>(std::move(*E)); // take ownership + return MBRef; } // Run MSVC link.exe for given in-memory object files. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/Common/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/lld/Common/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/lld/Common/CMakeLists.txt 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/Common/CMakeLists.txt 2017-10-17 14:42:28.000000000 +0000 @@ -18,6 +18,9 @@ Support Target + LINK_LIBS + ${LLVM_PTHREAD_LIB} + DEPENDS ${tablegen_deps} ) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Arch/AMDGPU.cpp llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Arch/AMDGPU.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Arch/AMDGPU.cpp 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Arch/AMDGPU.cpp 2017-10-17 14:42:28.000000000 +0000 @@ -32,7 +32,7 @@ } // namespace AMDGPU::AMDGPU() { - RelativeRel = R_AMDGPU_REL64; + RelativeRel = R_AMDGPU_RELATIVE64; GotRel = R_AMDGPU_ABS64; GotEntrySize = 8; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Config.h llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Config.h --- llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Config.h 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Config.h 2017-10-17 14:42:28.000000000 +0000 @@ -72,7 +72,6 @@ // and such fields have the same name as the corresponding options. // Most fields are initialized by the driver. struct Configuration { - InputFile *FirstElf = nullptr; uint8_t OSABI = 0; llvm::CachePruningPolicy ThinLTOCachePolicy; llvm::StringMap SectionStartMap; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Options.td llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Options.td --- llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Options.td 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Options.td 2017-10-17 14:42:28.000000000 +0000 @@ -5,7 +5,6 @@ class F: Flag<["--", "-"], name>; class J: Joined<["--", "-"], name>; class S: Separate<["--", "-"], name>; -class JS: JoinedOrSeparate<["--", "-"], name>; multiclass Eq { def "": Separate<["--", "-"], name>; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Relocations.cpp llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Relocations.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Relocations.cpp 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Relocations.cpp 2017-10-17 14:42:28.000000000 +0000 @@ -304,8 +304,14 @@ case R_MIPS_HI16: return R_MIPS_LO16; case R_MIPS_GOT16: - // I don't know why these relocations had to be defined like this, - // but they are handled differently when they refer to local symbols. + // In case of global symbol, the R_MIPS_GOT16 relocation does not + // have a pair. Each global symbol has a unique entry in the GOT + // and a corresponding instruction with help of the R_MIPS_GOT16 + // relocation loads an address of the symbol. In case of local + // symbol, the R_MIPS_GOT16 relocation creates a GOT entry to hold + // the high 16 bits of the symbol's value. A paired R_MIPS_LO16 + // relocations handle low 16 bits of the address. That allows + // to allocate only one GOT entry for every 64 KBytes of local data. return IsLocal ? R_MIPS_LO16 : R_MIPS_NONE; case R_MICROMIPS_GOT16: return IsLocal ? R_MICROMIPS_LO16 : R_MIPS_NONE; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/SymbolTable.cpp llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/SymbolTable.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/SymbolTable.cpp 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/SymbolTable.cpp 2017-10-17 14:42:28.000000000 +0000 @@ -31,6 +31,14 @@ SymbolTable *elf::Symtab; +static InputFile *getFirstElf() { + if (!ObjectFiles.empty()) + return ObjectFiles[0]; + if (!SharedFiles.empty()) + return SharedFiles[0]; + return nullptr; +} + // All input object files must be for the same architecture // (e.g. it does not make sense to link x86 object files with // MIPS object files.) This function checks for that error. @@ -48,15 +56,12 @@ if (!Config->Emulation.empty()) error(toString(F) + " is incompatible with " + Config->Emulation); else - error(toString(F) + " is incompatible with " + toString(Config->FirstElf)); + error(toString(F) + " is incompatible with " + toString(getFirstElf())); return false; } // Add symbols in File to the symbol table. template void SymbolTable::addFile(InputFile *File) { - if (!Config->FirstElf && isa>(File)) - Config->FirstElf = File; - if (!isCompatible(File)) return; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Writer.cpp llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Writer.cpp --- llvm-toolchain-snapshot-6.0~svn315865/lld/ELF/Writer.cpp 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/ELF/Writer.cpp 2017-10-17 14:42:28.000000000 +0000 @@ -930,11 +930,10 @@ if (OutputSections[I] == LastRW->FirstSec) break; - for (; I < OutputSections.size(); ++I) { - if (OutputSections[I]->Type != SHT_NOBITS) - continue; - break; - } + for (; I < OutputSections.size(); ++I) + if (OutputSections[I]->Type == SHT_NOBITS) + break; + if (ElfSym::Edata1) ElfSym::Edata1->Section = OutputSections[I - 1]; if (ElfSym::Edata2) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/test/COFF/export-all.s llvm-toolchain-snapshot-6.0~svn316003/lld/test/COFF/export-all.s --- llvm-toolchain-snapshot-6.0~svn315865/lld/test/COFF/export-all.s 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/test/COFF/export-all.s 2017-10-17 14:42:28.000000000 +0000 @@ -16,7 +16,7 @@ _foobar: ret -# Test specifying -export-all-symbols, on an object file that contains axi +# Test specifying -export-all-symbols, on an object file that contains # dllexport directive for some of the symbols. # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/test/COFF/linkrepro-res.test llvm-toolchain-snapshot-6.0~svn316003/lld/test/COFF/linkrepro-res.test --- llvm-toolchain-snapshot-6.0~svn315865/lld/test/COFF/linkrepro-res.test 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/test/COFF/linkrepro-res.test 2017-10-17 14:42:28.000000000 +0000 @@ -0,0 +1,12 @@ +# REQUIRES: x86, shell + +# RUN: rm -rf %t.dir +# RUN: mkdir -p %t.dir/build +# RUN: cd %t.dir/build +# RUN: lld-link %p/Inputs/resource.res /subsystem:console /machine:x64 \ +# RUN: /entry:__ImageBase /linkrepro:. /out:%t.exe +# RUN: tar xf repro.tar +# RUN: diff %p/Inputs/resource.res repro/%:p/Inputs/resource.res +# RUN: FileCheck %s --check-prefix=RSP < repro/response.txt + +# RSP: resource.res diff -Nru llvm-toolchain-snapshot-6.0~svn315865/lld/test/ELF/amdgpu-relocs.s llvm-toolchain-snapshot-6.0~svn316003/lld/test/ELF/amdgpu-relocs.s --- llvm-toolchain-snapshot-6.0~svn315865/lld/test/ELF/amdgpu-relocs.s 2017-10-15 17:44:34.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/lld/test/ELF/amdgpu-relocs.s 2017-10-17 14:42:28.000000000 +0000 @@ -65,10 +65,23 @@ .quad temp .size ptr, 8 +# R_AMDGPU_RELATIVE64: + .type temp2, @object + .local temp2 + .size temp2, 4 + + .type ptr2, @object + .globl ptr2 + .size ptr2, 8 + .p2align 3 +ptr2: + .quad temp2 + # The relocation for local_var{0, 1, 2} and var should be resolved by the # linker. # CHECK: Relocations [ # CHECK: .rela.dyn { +# CHECK-NEXT: R_AMDGPU_RELATIVE64 - 0x0 # CHECK-NEXT: R_AMDGPU_ABS64 common_var0 0x0 # CHECK-NEXT: R_AMDGPU_ABS64 common_var1 0x0 # CHECK-NEXT: R_AMDGPU_ABS64 common_var2 0x0 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/polly/test/Isl/CodeGen/large-numbers-in-boundary-context.ll llvm-toolchain-snapshot-6.0~svn316003/polly/test/Isl/CodeGen/large-numbers-in-boundary-context.ll --- llvm-toolchain-snapshot-6.0~svn315865/polly/test/Isl/CodeGen/large-numbers-in-boundary-context.ll 2017-10-15 17:44:27.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/polly/test/Isl/CodeGen/large-numbers-in-boundary-context.ll 2017-10-17 14:42:23.000000000 +0000 @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s +; XFAIL: * ; ; The boundary context contains a constant that does not fit in 64 bits. Hence, ; make sure we bail out. On certain systems, e.g. AOSP, no runtime support for diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir 2017-10-17 14:41:14.000000000 +0000 @@ -66,6 +66,9 @@ define void @bitcast_s64_fpr() { ret void } define void @bitcast_s64_gpr_fpr() { ret void } define void @bitcast_s64_fpr_gpr() { ret void } + define void @bitcast_s128() { ret void } + define void @copy_s128() { ret void } + define void @copy_s128_from_load() { ret void } define i64 @greedyWithChainOfComputation(i64 %arg1, <2 x i32>* %addr) { %varg1 = bitcast i64 %arg1 to <2 x i32> @@ -616,6 +619,95 @@ ... --- +# CHECK-LABEL: name: bitcast_s128 +name: bitcast_s128 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _} + - { id: 1, class: _} + - { id: 2, class: _} + - { id: 3, class: _} +# CHECK: registers: +# CHECK: - { id: 2, class: fpr, preferred-register: '' } +# CHECK: - { id: 3, class: fpr, preferred-register: '' } +# CHECK: %2(<2 x s64>) = G_BITCAST %3(s128) +body: | + bb.1: + liveins: %x0, %x1 + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %2(<2 x s64>) = G_BITCAST %3(s128) + %q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit %q0 + +... + +--- +# CHECK-LABEL: name: copy_s128 +# This test checks that we issue the proper mapping +# for copy of size > 64. +# The mapping should be the same as G_BITCAST. +name: copy_s128 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _} + - { id: 1, class: _} + - { id: 2, class: _} + - { id: 3, class: _} + - { id: 4, class: _} +# CHECK: registers: +# CHECK: - { id: 2, class: fpr, preferred-register: '' } +# CHECK: - { id: 3, class: fpr, preferred-register: '' } +# CHECK: - { id: 4, class: fpr, preferred-register: '' } +# CHECK: %4(s128) = COPY %3(s128) +# CHECK-NEXT: %2(<2 x s64>) = G_BITCAST %4(s128) +body: | + bb.1: + liveins: %x0, %x1 + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %4(s128) = COPY %3(s128) + %2(<2 x s64>) = G_BITCAST %4(s128) + %q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit %q0 + +... + +--- +# CHECK-LABEL: name: copy_s128_from_load +# This test checks that we issue the proper mapping +# for copy of size > 64 when the input is neither +# a physcal register nor a generic register. +# This used to crash when we moved to the statically +# computed mapping, because we were assuming non-physregs +# were generic registers and thus have a type, whereas +# it is not necessarily the case. +name: copy_s128_from_load +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr128} + - { id: 1, class: _} +# CHECK: registers: +# CHECK: - { id: 0, class: fpr128, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } +# CHECK: %1(s128) = COPY %0 +body: | + bb.1: + liveins: %x0 + %0 = LDRQui killed %x0, 0 + %1(s128) = COPY %0 + %q0 = COPY %1(s128) + RET_ReallyLR implicit %q0 + +... + + +--- # Make sure the greedy mode is able to take advantage of the # alternative mappings of G_LOAD to coalesce the whole chain # of computation on GPR. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir 2017-10-17 14:41:14.000000000 +0000 @@ -9,6 +9,9 @@ next: ret void } + define void @bitcast128() { + ret void + } ... --- @@ -84,3 +87,26 @@ %15(<4 x s8>) = G_BITCAST %0 %16(<2 x s16>) = G_BITCAST %0 ... + +--- +name: bitcast128 +tracksRegLiveness: true +registers: + - { id: 0, class: _} + - { id: 1, class: _} + - { id: 2, class: _} + - { id: 3, class: _} +body: | + bb.1: + liveins: %x0, %x1 + ; CHECK-LABEL: bitcast128 + ; This is legal and shouldn't be changed. + ; CHECK: %2(<2 x s64>) = G_BITCAST %3(s128) + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %2(<2 x s64>) = G_BITCAST %3(s128) + %q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit %q0 + +... diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AArch64/GlobalISel/select-load.mir llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AArch64/GlobalISel/select-load.mir --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AArch64/GlobalISel/select-load.mir 2017-10-15 17:43:15.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AArch64/GlobalISel/select-load.mir 2017-10-17 14:41:14.000000000 +0000 @@ -28,6 +28,7 @@ define void @load_gep_64_s16_fpr(i16* %addr) { ret void } define void @load_gep_32_s8_fpr(i8* %addr) { ret void } + define void @load_v2s32(i64 *%addr) { ret void } ... --- @@ -513,3 +514,28 @@ %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) %b0 = COPY %3 ... +--- +# CHECK-LABEL: name: load_v2s32 +name: load_v2s32 +legalized: true +regBankSelected: true + +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + +# CHECK: body: +# CHECK: %0 = COPY %x0 +# CHECK: %1 = LDRDui %0, 0 :: (load 8 from %ir.addr) +# CHECK: %d0 = COPY %1 +body: | + bb.0: + liveins: %x0 + + %0(p0) = COPY %x0 + %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) + %d0 = COPY %1(<2 x s32>) +... diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll 2017-10-17 14:41:14.000000000 +0000 @@ -0,0 +1,95 @@ +; RUN: llc -march=amdgcn < %s | FileCheck %s + +; Check we can compile this bugpoint-reduced test without an +; infinite loop in TLI.SimplifyDemandedBits() due to failure +; to use return value of TLO.DAG.UpdateNodeOperands() + +; Check that code was generated; we know there will be +; a s_endpgm, so check for it. + +@0 = external unnamed_addr addrspace(3) global [462 x float], align 4 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.y() #0 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #0 + +; CHECK: s_endpgm +define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp5 = and i32 %tmp, 15 + %tmp6 = mul nuw nsw i32 %tmp5, 21 + %tmp7 = sub i32 %tmp6, 0 + %tmp8 = add i32 %tmp7, 0 + %tmp9 = add i32 %tmp8, 0 + %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0 + br label %bb12 + +bb11: ; preds = %bb30 + br i1 undef, label %bb37, label %bb38 + +bb12: ; preds = %bb30, %bb + br i1 false, label %.preheader, label %.loopexit145 + +.loopexit145: ; preds = %.preheader, %bb12 + br label %bb13 + +bb13: ; preds = %.loopexit, %.loopexit145 + %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ] + %tmp15 = add nsw i32 %tmp14, -3 + %tmp16 = mul i32 %tmp14, 21 + br i1 undef, label %bb17, label %.loopexit + +bb17: ; preds = %bb13 + %tmp18 = mul i32 %tmp15, 224 + %tmp19 = add i32 undef, %tmp18 + br label %bb21 + +.loopexit: ; preds = %bb21, %bb13 + %tmp20 = add nuw nsw i32 %tmp14, 16 + br i1 undef, label %bb13, label %bb26 + +bb21: ; preds = %bb21, %bb17 + %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] + %tmp23 = add i32 %tmp22, %tmp16 + %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23 + store float undef, float addrspace(3)* %tmp24, align 4 + %tmp25 = add nuw i32 %tmp22, 8 + br i1 undef, label %bb21, label %.loopexit + +bb26: ; preds = %.loopexit + br label %bb31 + +.preheader: ; preds = %.preheader, %bb12 + %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ] + %tmp28 = add nuw i32 %tmp27, 128 + %tmp29 = icmp ult i32 %tmp28, 1568 + br i1 %tmp29, label %.preheader, label %.loopexit145 + +bb30: ; preds = %bb31 + br i1 undef, label %bb11, label %bb12 + +bb31: ; preds = %bb31, %bb26 + %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ] + %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32 + %tmp34 = load float, float addrspace(3)* %tmp33, align 4 + %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef) + %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35) + br i1 undef, label %bb30, label %bb31 + +bb37: ; preds = %bb11 + br label %bb38 + +bb38: ; preds = %bb37, %bb11 + ret void +} + +attributes #0 = { nounwind readnone speculatable } + +!0 = !{i32 8, i32 16, i32 1} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/BPF/select_ri.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/BPF/select_ri.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/BPF/select_ri.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/BPF/select_ri.ll 2017-10-17 14:41:14.000000000 +0000 @@ -25,3 +25,38 @@ } attributes #0 = { norecurse nounwind readonly } + +; test immediate out of 32-bit range +; Source file: + +; unsigned long long +; load_word(void *buf, unsigned long long off) +; asm("llvm.bpf.load.word"); +; +; int +; foo(void *buf) +; { +; unsigned long long sum = 0; +; +; sum += load_word(buf, 100); +; sum += load_word(buf, 104); +; +; if (sum != 0x1ffffffffULL) +; return ~0U; +; +; return 0; +;} + +; Function Attrs: nounwind readonly +define i32 @foo(i8*) local_unnamed_addr #0 { + %2 = tail call i64 @llvm.bpf.load.word(i8* %0, i64 100) + %3 = tail call i64 @llvm.bpf.load.word(i8* %0, i64 104) + %4 = add i64 %3, %2 + %5 = icmp ne i64 %4, 8589934591 +; CHECK: r{{[0-9]+}} = 8589934591 ll + %6 = sext i1 %5 to i32 + ret i32 %6 +} + +; Function Attrs: nounwind readonly +declare i64 @llvm.bpf.load.word(i8*, i64) #1 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/Mips/constMaterialization.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/Mips/constMaterialization.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/Mips/constMaterialization.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/Mips/constMaterialization.ll 2017-10-17 14:41:14.000000000 +0000 @@ -0,0 +1,136 @@ +; RUN: llc -march=mips < %s | FileCheck %s -check-prefixes=ALL,MIPS +; RUN: llc -march=mips < %s -mattr=+micromips | FileCheck %s -check-prefixes=ALL,MM + +; Test the patterns used for constant materialization. + +; Constants generated using li16 +define i32 @Li16LowBoundary() { +entry: + ; ALL-LABEL: Li16LowBoundary: + ; MIPS: addiu $2, $zero, -1 + ; MM: li16 $2, -1 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; MIPS-NOT: li16 + ; MM-NOT: addiu + + ret i32 -1 +} + +define i32 @Li16HighBoundary() { +entry: + ; ALL-LABEL: Li16HighBoundary: + ; MIPS: addiu $2, $zero, 126 + ; MM: li16 $2, 126 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; MM-NOT: addiu + ; MIPS-NOT: li16 + + ret i32 126 +} + +; Constants generated using addiu +define i32 @AddiuLowBoundary() { +entry: + ; ALL-LABEL: AddiuLowBoundary: + ; ALL: addiu $2, $zero, -32768 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 -32768 +} + +define i32 @AddiuZero() { +entry: + ; ALL-LABEL: AddiuZero: + ; MIPS: addiu $2, $zero, 0 + ; MM: li16 $2, 0 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; MIPS-NOT: li16 + ; MM-NOT: addiu + + ret i32 0 +} + +define i32 @AddiuHighBoundary() { +entry: + ; ALL-LABEL: AddiuHighBoundary: + ; ALL: addiu $2, $zero, 32767 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 32767 +} + +; Constants generated using ori +define i32 @OriLowBoundary() { +entry: + ; ALL-LABEL: OriLowBoundary: + ; ALL: ori $2, $zero, 32768 + ; ALL-NOT: addiu + ; ALL-NOT: lui + ; ALL-NOT: li16 + + ret i32 32768 +} + +define i32 @OriHighBoundary() { +entry: + ; ALL-LABEL: OriHighBoundary: + ; ALL: ori $2, $zero, 65535 + ; ALL-NOT: addiu + ; ALL-NOT: lui + ; ALL-NOT: li16 + + ret i32 65535 +} + +; Constants generated using lui +define i32 @LuiPositive() { +entry: + ; ALL-LABEL: LuiPositive: + ; ALL: lui $2, 1 + ; ALL-NOT: addiu + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 65536 +} + +define i32 @LuiNegative() { +entry: + ; ALL-LABEL: LuiNegative: + ; ALL: lui $2, 65535 + ; ALL-NOT: addiu + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 -65536 +} + +; Constants generated using a combination of lui and ori +define i32 @LuiWithLowBitsSet() { +entry: + ; ALL-LABEL: LuiWithLowBitsSet: + ; ALL: lui $1, 1 + ; ALL: ori $2, $1, 1 + ; ALL-NOT: addiu + ; ALL-NOT: li16 + + ret i32 65537 +} + +define i32 @BelowAddiuLowBoundary() { +entry: + ; ALL-LABEL: BelowAddiuLowBoundary: + ; ALL: lui $1, 65535 + ; ALL: ori $2, $1, 32767 + ; ALL-NOT: addiu + ; ALL-NOT: li16 + + ret i32 -32769 +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/expand-isel.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/expand-isel.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/expand-isel.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/expand-isel.ll 2017-10-17 14:41:14.000000000 +0000 @@ -215,9 +215,7 @@ ; CHECK-LABEL: @testComplexISEL ; CHECK-DAG: [[LI:r[0-9]+]], 1 ; CHECK-DAG: cmplwi [[LD:r[0-9]+]], 0 -; CHECK: beq cr0, [[EQ:.LBB[0-9_]+]] -; CHECK: blr -; CHECK: [[EQ]] +; CHECK: bnelr cr0 ; CHECK: xor [[XOR:r[0-9]+]] ; CHECK: cntlzd [[CZ:r[0-9]+]], [[XOR]] ; CHECK: rldicl [[SH:r[0-9]+]], [[CZ]], 58, 63 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll 2017-10-17 14:41:14.000000000 +0000 @@ -45,13 +45,9 @@ ; CHECK-NEXT: ld 4, 8(4) ; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB1_3 +; CHECK-NEXT: beqlr 0 ; CHECK-NEXT: .LBB1_2: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: clrldi 3, 3, 32 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_3: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16) %not.tobool = icmp ne i32 %call, 0 @@ -77,13 +73,9 @@ ; CHECK-NEXT: lbz 4, 6(4) ; CHECK-NEXT: cmplw 3, 4 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB2_4 +; CHECK-NEXT: beqlr 0 ; CHECK-NEXT: .LBB2_3: # %res_block ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: clrldi 3, 3, 32 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_4: # %endblock -; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7) %not.lnot = icmp ne i32 %call, 0 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll 2017-10-17 14:41:14.000000000 +0000 @@ -328,7 +328,6 @@ ; CHECK-LABEL: @getuc0 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 8, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc0 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: clrldi 3, 3, 56 @@ -342,11 +341,9 @@ ; CHECK-LABEL: @getuc1 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 16, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc1 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 56, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -357,11 +354,9 @@ ; CHECK-LABEL: @getuc2 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 24, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc2 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 48, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -372,11 +367,9 @@ ; CHECK-LABEL: @getuc3 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 32, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc3 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 40, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -387,11 +380,9 @@ ; CHECK-LABEL: @getuc4 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 40, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc4 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 32, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -402,11 +393,9 @@ ; CHECK-LABEL: @getuc5 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 48, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc5 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 24, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -417,11 +406,9 @@ ; CHECK-LABEL: @getuc6 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 56, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc6 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 16, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -435,7 +422,6 @@ ; CHECK-LE-LABEL: @getuc7 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 8, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -446,7 +432,6 @@ ; CHECK-LABEL: @getuc8 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 8, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc8 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: clrldi 3, 3, 56 @@ -460,11 +445,9 @@ ; CHECK-LABEL: @getuc9 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 16, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc9 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 56, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -475,11 +458,9 @@ ; CHECK-LABEL: @getuc10 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 24, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc10 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 48, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -490,11 +471,9 @@ ; CHECK-LABEL: @getuc11 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 32, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc11 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 40, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -505,11 +484,9 @@ ; CHECK-LABEL: @getuc12 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 40, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc12 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 32, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -520,11 +497,9 @@ ; CHECK-LABEL: @getuc13 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 48, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc13 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 24, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -535,11 +510,9 @@ ; CHECK-LABEL: @getuc14 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 56, 56 -; CHECK: clrldi 3, 3, 56 ; CHECK-LE-LABEL: @getuc14 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 16, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -553,7 +526,6 @@ ; CHECK-LE-LABEL: @getuc15 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 8, 56 -; CHECK-LE: clrldi 3, 3, 56 } ; Function Attrs: norecurse nounwind readnone @@ -739,7 +711,6 @@ ; CHECK-LABEL: @getus0 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 16, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus0 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: clrldi 3, 3, 48 @@ -753,11 +724,9 @@ ; CHECK-LABEL: @getus1 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 32, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus1 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 48, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -768,11 +737,9 @@ ; CHECK-LABEL: @getus2 ; CHECK: mfvsrd 3, 34 ; CHECK: rldicl 3, 3, 48, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus2 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 32, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -786,7 +753,6 @@ ; CHECK-LE-LABEL: @getus3 ; CHECK-LE: mfvsrd 3, ; CHECK-LE: rldicl 3, 3, 16, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -797,7 +763,6 @@ ; CHECK-LABEL: @getus4 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 16, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus4 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: clrldi 3, 3, 48 @@ -811,11 +776,9 @@ ; CHECK-LABEL: @getus5 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 32, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus5 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 48, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -826,11 +789,9 @@ ; CHECK-LABEL: @getus6 ; CHECK: mfvsrd 3, ; CHECK: rldicl 3, 3, 48, 48 -; CHECK: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getus6 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 32, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -844,7 +805,6 @@ ; CHECK-LE-LABEL: @getus7 ; CHECK-LE: mfvsrd 3, 34 ; CHECK-LE: rldicl 3, 3, 16, 48 -; CHECK-LE: clrldi 3, 3, 48 } ; Function Attrs: norecurse nounwind readnone @@ -973,11 +933,9 @@ ; CHECK-LABEL: @getui0 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 ; CHECK: mfvsrwz 3, [[SHL]] -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui0 ; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: mfvsrwz 3, [[SHL]] -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone @@ -987,11 +945,9 @@ ret i32 %vecext ; CHECK-LABEL: @getui1 ; CHECK: mfvsrwz 3, 34 -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui1 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 ; CHECK-LE: mfvsrwz 3, [[SHL]] -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone @@ -1002,10 +958,8 @@ ; CHECK-LABEL: @getui2 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 ; CHECK: mfvsrwz 3, [[SHL]] -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui2 ; CHECK-LE: mfvsrwz 3, 34 -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone @@ -1016,11 +970,9 @@ ; CHECK-LABEL: @getui3 ; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: mfvsrwz 3, [[SHL]] -; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui3 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3 ; CHECK-LE: mfvsrwz 3, [[SHL]] -; CHECK-LE: clrldi 3, 3, 32 } ; Function Attrs: norecurse nounwind readnone diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll 2017-10-17 14:41:14.000000000 +0000 @@ -31,7 +31,7 @@ ; CHECK-LABEL: limit_loop ; CHECK: mtctr ; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, 1 -; CHECK: bdnz +; CHECK: bdzlr ; CHECK: blr } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/3dnow-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/3dnow-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/3dnow-schedule.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/3dnow-schedule.ll 2017-10-17 14:41:14.000000000 +0000 @@ -0,0 +1,372 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+3dnowa | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC + +define void @test_femms() optsize { +; CHECK-LABEL: test_femms: +; CHECK: # BB#0: +; CHECK-NEXT: femms +; CHECK-NEXT: retq # sched: [1:1.00] + call void @llvm.x86.mmx.femms() + ret void +} +declare void @llvm.x86.mmx.femms() nounwind readnone + +define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pavgusb: +; CHECK: # BB#0: +; CHECK-NEXT: pavgusb %mm1, %mm0 +; CHECK-NEXT: pavgusb (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pf2id(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pf2id: +; CHECK: # BB#0: +; CHECK-NEXT: pf2id (%rdi), %mm0 +; CHECK-NEXT: pf2id %mm0, %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone + +define i64 @test_pf2iw(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pf2iw: +; CHECK: # BB#0: +; CHECK-NEXT: pf2iw (%rdi), %mm0 +; CHECK-NEXT: pf2iw %mm0, %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone + +define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfacc: +; CHECK: # BB#0: +; CHECK-NEXT: pfacc %mm1, %mm0 +; CHECK-NEXT: pfacc (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfadd: +; CHECK: # BB#0: +; CHECK-NEXT: pfadd %mm1, %mm0 +; CHECK-NEXT: pfadd (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfcmpeq: +; CHECK: # BB#0: +; CHECK-NEXT: pfcmpeq %mm1, %mm0 +; CHECK-NEXT: pfcmpeq (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfcmpge: +; CHECK: # BB#0: +; CHECK-NEXT: pfcmpge %mm1, %mm0 +; CHECK-NEXT: pfcmpge (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfcmpgt: +; CHECK: # BB#0: +; CHECK-NEXT: pfcmpgt %mm1, %mm0 +; CHECK-NEXT: pfcmpgt (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfmax: +; CHECK: # BB#0: +; CHECK-NEXT: pfmax %mm1, %mm0 +; CHECK-NEXT: pfmax (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfmin: +; CHECK: # BB#0: +; CHECK-NEXT: pfmin %mm1, %mm0 +; CHECK-NEXT: pfmin (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfmul: +; CHECK: # BB#0: +; CHECK-NEXT: pfmul %mm1, %mm0 +; CHECK-NEXT: pfmul (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfnacc: +; CHECK: # BB#0: +; CHECK-NEXT: pfnacc %mm1, %mm0 +; CHECK-NEXT: pfnacc (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfpnacc: +; CHECK: # BB#0: +; CHECK-NEXT: pfpnacc %mm1, %mm0 +; CHECK-NEXT: pfpnacc (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfrcp(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pfrcp: +; CHECK: # BB#0: +; CHECK-NEXT: pfrcp (%rdi), %mm0 +; CHECK-NEXT: pfrcp %mm0, %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone + +define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfrcpit1: +; CHECK: # BB#0: +; CHECK-NEXT: pfrcpit1 %mm1, %mm0 +; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfrcpit2: +; CHECK: # BB#0: +; CHECK-NEXT: pfrcpit2 %mm1, %mm0 +; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfrsqit1: +; CHECK: # BB#0: +; CHECK-NEXT: pfrsqit1 %mm1, %mm0 +; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfrsqrt(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pfrsqrt: +; CHECK: # BB#0: +; CHECK-NEXT: pfrsqrt (%rdi), %mm0 +; CHECK-NEXT: pfrsqrt %mm0, %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone + +define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfsub: +; CHECK: # BB#0: +; CHECK-NEXT: pfsub %mm1, %mm0 +; CHECK-NEXT: pfsub (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pfsubr: +; CHECK: # BB#0: +; CHECK-NEXT: pfsubr %mm1, %mm0 +; CHECK-NEXT: pfsubr (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pi2fd(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pi2fd: +; CHECK: # BB#0: +; CHECK-NEXT: pi2fd (%rdi), %mm0 +; CHECK-NEXT: pi2fd %mm0, %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone + +define i64 @test_pi2fw(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pi2fw: +; CHECK: # BB#0: +; CHECK-NEXT: pi2fw (%rdi), %mm0 +; CHECK-NEXT: pi2fw %mm0, %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone + +define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; CHECK-LABEL: test_pmulhrw: +; CHECK: # BB#0: +; CHECK-NEXT: pmulhrw %mm1, %mm0 +; CHECK-NEXT: pmulhrw (%rdi), %mm0 +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pswapd(x86_mmx* %a0) optsize { +; CHECK-LABEL: test_pswapd: +; CHECK: # BB#0: +; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] +; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] +; CHECK-NEXT: movd %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [1:1.00] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/aes-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/aes-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/aes-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/aes-schedule.ll 2017-10-17 14:41:14.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -34,11 +35,17 @@ ; HASWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesdec: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesdec: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_aesdec: ; BTVER2: # BB#0: @@ -83,11 +90,17 @@ ; HASWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesdeclast: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesdeclast: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_aesdeclast: ; BTVER2: # BB#0: @@ -132,11 +145,17 @@ ; HASWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesenc: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesenc: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_aesenc: ; BTVER2: # BB#0: @@ -181,11 +200,17 @@ ; HASWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesenclast: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesenclast: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_aesenclast: ; BTVER2: # BB#0: @@ -234,12 +259,19 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aesimc: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] +; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aesimc: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00] -; SKYLAKE-NEXT: vaesimc (%rdi), %xmm1 # sched: [8:2.00] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00] +; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_aesimc: ; BTVER2: # BB#0: @@ -291,12 +323,19 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_aeskeygenassist: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00] +; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [28:7.00] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_aeskeygenassist: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00] -; SKYLAKE-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [19:6.00] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00] +; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_aeskeygenassist: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/avx2-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/avx2-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/avx2-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/avx2-schedule.ll 2017-10-17 14:41:14.000000000 +0000 @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -18,11 +19,17 @@ ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcasti128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcasti128: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] +; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcasti128: ; SKX: # BB#0: @@ -54,11 +61,17 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastsd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastsd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastsd_ymm: ; SKX: # BB#0: @@ -89,11 +102,17 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastss: ; SKX: # BB#0: @@ -124,11 +143,17 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastss_ymm: ; SKX: # BB#0: @@ -165,14 +190,23 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_extracti128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_extracti128: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_extracti128: ; SKX: # BB#0: @@ -210,10 +244,15 @@ ; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherdpd: ; SKX: # BB#0: @@ -240,10 +279,15 @@ ; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdpd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [20:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherdpd_ymm: ; SKX: # BB#0: @@ -270,10 +314,15 @@ ; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherdps: ; SKX: # BB#0: @@ -300,10 +349,15 @@ ; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherdps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherdps_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [20:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherdps_ymm: ; SKX: # BB#0: @@ -330,10 +384,15 @@ ; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherqpd: ; SKX: # BB#0: @@ -360,10 +419,15 @@ ; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqpd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [20:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherqpd_ymm: ; SKX: # BB#0: @@ -390,10 +454,15 @@ ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherqps: ; SKX: # BB#0: @@ -422,11 +491,17 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_gatherqps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_gatherqps_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [20:1.00] +; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_gatherqps_ymm: ; SKX: # BB#0: @@ -459,12 +534,19 @@ ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_inserti128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_inserti128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_inserti128: ; SKX: # BB#0: @@ -499,10 +581,15 @@ ; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntdqa: ; SKX: # BB#0: @@ -531,11 +618,17 @@ ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mpsadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] +; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [4:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mpsadbw: ; SKX: # BB#0: @@ -571,12 +664,19 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] +; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsb: ; SKX: # BB#0: @@ -614,12 +714,19 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] +; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsd: ; SKX: # BB#0: @@ -657,12 +764,19 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] +; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsw: ; SKX: # BB#0: @@ -698,11 +812,17 @@ ; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packssdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packssdw: ; SKX: # BB#0: @@ -736,11 +856,17 @@ ; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packsswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packsswb: ; SKX: # BB#0: @@ -774,11 +900,17 @@ ; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packusdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packusdw: ; SKX: # BB#0: @@ -812,11 +944,17 @@ ; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packuswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packuswb: ; SKX: # BB#0: @@ -850,11 +988,17 @@ ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddb: ; SKX: # BB#0: @@ -886,11 +1030,17 @@ ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddd: ; SKX: # BB#0: @@ -922,11 +1072,17 @@ ; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddq: ; SKX: # BB#0: @@ -958,11 +1114,17 @@ ; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddsb: ; SKX: # BB#0: @@ -995,11 +1157,17 @@ ; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddsw: ; SKX: # BB#0: @@ -1032,11 +1200,17 @@ ; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddusb: ; SKX: # BB#0: @@ -1069,11 +1243,17 @@ ; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddusw: ; SKX: # BB#0: @@ -1106,11 +1286,17 @@ ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddw: ; SKX: # BB#0: @@ -1142,11 +1328,17 @@ ; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_palignr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] +; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_palignr: ; SKX: # BB#0: @@ -1180,12 +1372,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pand: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pand: ; SKX: # BB#0: @@ -1222,12 +1421,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pandn: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pandn: ; SKX: # BB#0: @@ -1264,11 +1470,17 @@ ; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pavgb: ; SKX: # BB#0: @@ -1310,11 +1522,17 @@ ; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pavgw: ; SKX: # BB#0: @@ -1358,12 +1576,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] +; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] -; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] +; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pblendd: ; SKX: # BB#0: @@ -1400,12 +1625,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] +; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] -; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] +; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pblendd_ymm: ; SKX: # BB#0: @@ -1440,11 +1672,17 @@ ; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendvb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pblendvb: ; SKX: # BB#0: @@ -1477,11 +1715,17 @@ ; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pblendw: ; SKX: # BB#0: @@ -1515,12 +1759,19 @@ ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] +; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastb: ; SKX: # BB#0: @@ -1557,12 +1808,19 @@ ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastb_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastb_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] +; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastb_ymm: ; SKX: # BB#0: @@ -1599,12 +1857,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastd: ; SKX: # BB#0: @@ -1640,12 +1905,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastd_ymm: ; SKX: # BB#0: @@ -1681,12 +1953,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastq: ; SKX: # BB#0: @@ -1722,12 +2001,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastq_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastq_ymm: ; SKX: # BB#0: @@ -1763,12 +2049,19 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] +; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastw: ; SKX: # BB#0: @@ -1805,12 +2098,19 @@ ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pbroadcastw_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pbroadcastw_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] +; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pbroadcastw_ymm: ; SKX: # BB#0: @@ -1845,11 +2145,17 @@ ; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqb: ; SKX: # BB#0: @@ -1885,11 +2191,17 @@ ; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqd: ; SKX: # BB#0: @@ -1925,11 +2237,17 @@ ; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqq: ; SKX: # BB#0: @@ -1965,11 +2283,17 @@ ; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqw: ; SKX: # BB#0: @@ -2005,11 +2329,17 @@ ; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtb: ; SKX: # BB#0: @@ -2045,11 +2375,17 @@ ; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtd: ; SKX: # BB#0: @@ -2085,11 +2421,17 @@ ; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtq: ; SKX: # BB#0: @@ -2125,11 +2467,17 @@ ; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtw: ; SKX: # BB#0: @@ -2167,12 +2515,19 @@ ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_perm2i128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_perm2i128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_perm2i128: ; SKX: # BB#0: @@ -2209,12 +2564,19 @@ ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permd: ; SKX: # BB#0: @@ -2252,12 +2614,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permpd: ; SKX: # BB#0: @@ -2294,12 +2663,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permps: ; SKX: # BB#0: @@ -2337,12 +2713,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permq: ; SKX: # BB#0: @@ -2375,10 +2758,15 @@ ; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherdd: ; SKX: # BB#0: @@ -2405,10 +2793,15 @@ ; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherdd_ymm: ; SKX: # BB#0: @@ -2435,10 +2828,15 @@ ; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherdq: ; SKX: # BB#0: @@ -2465,10 +2863,15 @@ ; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherdq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherdq_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [20:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherdq_ymm: ; SKX: # BB#0: @@ -2495,10 +2898,15 @@ ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherqd: ; SKX: # BB#0: @@ -2527,11 +2935,17 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [20:1.00] +; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherqd_ymm: ; SKX: # BB#0: @@ -2560,10 +2974,15 @@ ; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherqq: ; SKX: # BB#0: @@ -2590,10 +3009,15 @@ ; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pgatherqq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pgatherqq_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pgatherqq_ymm: ; SKX: # BB#0: @@ -2622,11 +3046,17 @@ ; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddd: ; SKX: # BB#0: @@ -2659,11 +3089,17 @@ ; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddsw: ; SKX: # BB#0: @@ -2696,11 +3132,17 @@ ; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddw: ; SKX: # BB#0: @@ -2733,11 +3175,17 @@ ; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubd: ; SKX: # BB#0: @@ -2770,11 +3218,17 @@ ; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubsw: ; SKX: # BB#0: @@ -2807,11 +3261,17 @@ ; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubw: ; SKX: # BB#0: @@ -2844,11 +3304,17 @@ ; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # BB#0: @@ -2882,11 +3348,17 @@ ; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddwd: ; SKX: # BB#0: @@ -2922,12 +3394,19 @@ ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaskmovd: ; SKX: # BB#0: @@ -2964,12 +3443,19 @@ ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaskmovd_ymm: ; SKX: # BB#0: @@ -3006,12 +3492,19 @@ ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaskmovq: ; SKX: # BB#0: @@ -3048,12 +3541,19 @@ ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaskmovq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaskmovq_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaskmovq_ymm: ; SKX: # BB#0: @@ -3088,11 +3588,17 @@ ; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsb: ; SKX: # BB#0: @@ -3125,11 +3631,17 @@ ; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsd: ; SKX: # BB#0: @@ -3162,11 +3674,17 @@ ; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsw: ; SKX: # BB#0: @@ -3199,11 +3717,17 @@ ; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxub: ; SKX: # BB#0: @@ -3236,11 +3760,17 @@ ; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxud: ; SKX: # BB#0: @@ -3273,11 +3803,17 @@ ; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxuw: ; SKX: # BB#0: @@ -3310,11 +3846,17 @@ ; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsb: ; SKX: # BB#0: @@ -3347,11 +3889,17 @@ ; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsd: ; SKX: # BB#0: @@ -3384,11 +3932,17 @@ ; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsw: ; SKX: # BB#0: @@ -3421,11 +3975,17 @@ ; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminub: ; SKX: # BB#0: @@ -3458,11 +4018,17 @@ ; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminud: ; SKX: # BB#0: @@ -3495,11 +4061,17 @@ ; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminuw: ; SKX: # BB#0: @@ -3532,11 +4104,17 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovmskb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovmskb: ; SKX: # BB#0: @@ -3569,12 +4147,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxbd: ; SKX: # BB#0: @@ -3613,12 +4198,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxbq: ; SKX: # BB#0: @@ -3657,12 +4249,19 @@ ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] +; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxbw: ; SKX: # BB#0: @@ -3699,12 +4298,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxdq: ; SKX: # BB#0: @@ -3741,12 +4347,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxwd: ; SKX: # BB#0: @@ -3783,12 +4396,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxwq: ; SKX: # BB#0: @@ -3827,12 +4447,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxbd: ; SKX: # BB#0: @@ -3871,12 +4498,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxbq: ; SKX: # BB#0: @@ -3915,12 +4549,19 @@ ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] +; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxbw: ; SKX: # BB#0: @@ -3957,12 +4598,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxdq: ; SKX: # BB#0: @@ -3999,12 +4647,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxwd: ; SKX: # BB#0: @@ -4041,12 +4696,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxwq: ; SKX: # BB#0: @@ -4083,11 +4745,17 @@ ; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuldq: ; SKX: # BB#0: @@ -4121,11 +4789,17 @@ ; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhrsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # BB#0: @@ -4158,11 +4832,17 @@ ; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhuw: ; SKX: # BB#0: @@ -4195,11 +4875,17 @@ ; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhw: ; SKX: # BB#0: @@ -4232,11 +4918,17 @@ ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] +; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67] -; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [8:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulld: ; SKX: # BB#0: @@ -4268,11 +4960,17 @@ ; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmullw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmullw: ; SKX: # BB#0: @@ -4304,11 +5002,17 @@ ; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuludq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuludq: ; SKX: # BB#0: @@ -4344,12 +5048,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_por: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_por: ; SKX: # BB#0: @@ -4384,11 +5095,17 @@ ; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psadbw: ; SKX: # BB#0: @@ -4422,11 +5139,17 @@ ; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufb: ; SKX: # BB#0: @@ -4461,12 +5184,19 @@ ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] +; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufd: ; SKX: # BB#0: @@ -4503,12 +5233,19 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] +; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] +; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufhw: ; SKX: # BB#0: @@ -4545,12 +5282,19 @@ ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshuflw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] +; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshuflw: ; SKX: # BB#0: @@ -4585,11 +5329,17 @@ ; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignb: ; SKX: # BB#0: @@ -4622,11 +5372,17 @@ ; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignd: ; SKX: # BB#0: @@ -4659,11 +5415,17 @@ ; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignw: ; SKX: # BB#0: @@ -4698,12 +5460,19 @@ ; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pslld: ; SKX: # BB#0: @@ -4737,10 +5506,15 @@ ; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pslldq: ; SKX: # BB#0: @@ -4770,12 +5544,19 @@ ; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllq: ; SKX: # BB#0: @@ -4811,11 +5592,17 @@ ; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllvd: ; SKX: # BB#0: @@ -4848,11 +5635,17 @@ ; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllvd_ymm: ; SKX: # BB#0: @@ -4885,11 +5678,17 @@ ; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllvq: ; SKX: # BB#0: @@ -4922,11 +5721,17 @@ ; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllvq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllvq_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllvq_ymm: ; SKX: # BB#0: @@ -4961,12 +5766,19 @@ ; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllw: ; SKX: # BB#0: @@ -5004,12 +5816,19 @@ ; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrad: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrad: ; SKX: # BB#0: @@ -5045,11 +5864,17 @@ ; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psravd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psravd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psravd: ; SKX: # BB#0: @@ -5082,11 +5907,17 @@ ; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psravd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psravd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psravd_ymm: ; SKX: # BB#0: @@ -5121,12 +5952,19 @@ ; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psraw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psraw: ; SKX: # BB#0: @@ -5164,12 +6002,19 @@ ; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrld: ; SKX: # BB#0: @@ -5203,10 +6048,15 @@ ; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrldq: ; SKX: # BB#0: @@ -5236,12 +6086,19 @@ ; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlq: ; SKX: # BB#0: @@ -5277,11 +6134,17 @@ ; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlvd: ; SKX: # BB#0: @@ -5314,11 +6177,17 @@ ; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlvd_ymm: ; SKX: # BB#0: @@ -5351,11 +6220,17 @@ ; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlvq: ; SKX: # BB#0: @@ -5388,11 +6263,17 @@ ; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlvq_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlvq_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlvq_ymm: ; SKX: # BB#0: @@ -5427,12 +6308,19 @@ ; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] +; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlw: ; SKX: # BB#0: @@ -5468,11 +6356,17 @@ ; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubb: ; SKX: # BB#0: @@ -5504,11 +6398,17 @@ ; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubd: ; SKX: # BB#0: @@ -5540,11 +6440,17 @@ ; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubq: ; SKX: # BB#0: @@ -5576,11 +6482,17 @@ ; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubsb: ; SKX: # BB#0: @@ -5613,11 +6525,17 @@ ; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubsw: ; SKX: # BB#0: @@ -5650,11 +6568,17 @@ ; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubusb: ; SKX: # BB#0: @@ -5687,11 +6611,17 @@ ; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubusw: ; SKX: # BB#0: @@ -5724,11 +6654,17 @@ ; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubw: ; SKX: # BB#0: @@ -5760,11 +6696,17 @@ ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhbw: ; SKX: # BB#0: @@ -5800,13 +6742,21 @@ ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhdq: ; SKX: # BB#0: @@ -5845,12 +6795,19 @@ ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhqdq: ; SKX: # BB#0: @@ -5885,11 +6842,17 @@ ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhwd: ; SKX: # BB#0: @@ -5921,11 +6884,17 @@ ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklbw: ; SKX: # BB#0: @@ -5961,13 +6930,21 @@ ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckldq: ; SKX: # BB#0: @@ -6006,12 +6983,19 @@ ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklqdq: ; SKX: # BB#0: @@ -6046,11 +7030,17 @@ ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklwd: ; SKX: # BB#0: @@ -6084,12 +7074,19 @@ ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pxor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pxor: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/avx-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/avx-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/avx-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/avx-schedule.ll 2017-10-17 14:41:14.000000000 +0000 @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -27,11 +28,17 @@ ; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addpd: ; SKX: # BB#0: @@ -75,11 +82,17 @@ ; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addps: ; SKX: # BB#0: @@ -123,11 +136,17 @@ ; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubpd: ; SKX: # BB#0: @@ -172,11 +191,17 @@ ; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubps: ; SKX: # BB#0: @@ -224,12 +249,19 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # BB#0: @@ -286,12 +318,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotps: ; SKX: # BB#0: @@ -348,12 +387,19 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andpd: ; SKX: # BB#0: @@ -408,12 +454,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andps: ; SKX: # BB#0: @@ -468,12 +521,19 @@ ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] +; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # BB#0: @@ -521,11 +581,17 @@ ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] +; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] +; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendps: ; SKX: # BB#0: @@ -569,11 +635,17 @@ ; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendvpd: ; SKX: # BB#0: @@ -618,11 +690,17 @@ ; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendvps: ; SKX: # BB#0: @@ -664,10 +742,15 @@ ; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastf128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastf128: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastf128: ; SKX: # BB#0: @@ -704,10 +787,15 @@ ; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastsd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastsd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastsd_ymm: ; SKX: # BB#0: @@ -745,10 +833,15 @@ ; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastss: ; SKX: # BB#0: @@ -786,10 +879,15 @@ ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_broadcastss_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_broadcastss_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_broadcastss_ymm: ; SKX: # BB#0: @@ -833,12 +931,19 @@ ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmppd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33] -; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmppd: ; SKX: # BB#0: @@ -894,12 +999,19 @@ ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33] -; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpps: ; SKX: # BB#0: @@ -955,12 +1067,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [7:1.00] +; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2pd: ; SKX: # BB#0: @@ -1013,12 +1132,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [4:0.50] +; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # BB#0: @@ -1069,12 +1195,19 @@ ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2dq: ; SKX: # BB#0: @@ -1125,12 +1258,19 @@ ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2ps: ; SKX: # BB#0: @@ -1181,12 +1321,19 @@ ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] +; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2dq: ; SKX: # BB#0: @@ -1234,11 +1381,17 @@ ; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00] -; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_divpd: ; SKX: # BB#0: @@ -1282,11 +1435,17 @@ ; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00] -; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_divps: ; SKX: # BB#0: @@ -1330,11 +1489,17 @@ ; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_dpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] +; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33] -; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.33] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_dpps: ; SKX: # BB#0: @@ -1382,12 +1547,19 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_extractf128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_extractf128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_extractf128: ; SKX: # BB#0: @@ -1433,11 +1605,17 @@ ; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_haddpd: ; SKX: # BB#0: @@ -1482,11 +1660,17 @@ ; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_haddps: ; SKX: # BB#0: @@ -1531,11 +1715,17 @@ ; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_hsubpd: ; SKX: # BB#0: @@ -1580,11 +1770,17 @@ ; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_hsubps: ; SKX: # BB#0: @@ -1632,12 +1828,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_insertf128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_insertf128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_insertf128: ; SKX: # BB#0: @@ -1684,10 +1887,15 @@ ; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lddqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_lddqu: ; SKX: # BB#0: @@ -1730,12 +1938,19 @@ ; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKYLAKE-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maskmovpd: ; SKX: # BB#0: @@ -1786,12 +2001,19 @@ ; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovpd_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKYLAKE-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maskmovpd_ymm: ; SKX: # BB#0: @@ -1842,12 +2064,19 @@ ; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKYLAKE-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maskmovps: ; SKX: # BB#0: @@ -1898,12 +2127,19 @@ ; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [2:2.00] +; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovps_ymm: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKYLAKE-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKYLAKE-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maskmovps_ymm: ; SKX: # BB#0: @@ -1951,11 +2187,17 @@ ; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxpd: ; SKX: # BB#0: @@ -2000,11 +2242,17 @@ ; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxps: ; SKX: # BB#0: @@ -2049,11 +2297,17 @@ ; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minpd: ; SKX: # BB#0: @@ -2098,11 +2352,17 @@ ; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minps: ; SKX: # BB#0: @@ -2150,12 +2410,19 @@ ; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movapd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movapd: ; SKX: # BB#0: @@ -2205,12 +2472,19 @@ ; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movaps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movaps: ; SKX: # BB#0: @@ -2260,12 +2534,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movddup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] +; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50] +; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movddup: ; SKX: # BB#0: @@ -2313,11 +2594,17 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movmskpd: ; SKX: # BB#0: @@ -2359,11 +2646,17 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movmskps: ; SKX: # BB#0: @@ -2405,11 +2698,17 @@ ; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntpd: ; SKX: # BB#0: @@ -2452,11 +2751,17 @@ ; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntps: ; SKX: # BB#0: @@ -2502,12 +2807,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movshdup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] +; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50] +; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movshdup: ; SKX: # BB#0: @@ -2558,12 +2870,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsldup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] +; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50] +; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsldup: ; SKX: # BB#0: @@ -2616,12 +2935,19 @@ ; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movupd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movupd: ; SKX: # BB#0: @@ -2673,12 +2999,19 @@ ; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movups: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movups: ; SKX: # BB#0: @@ -2725,11 +3058,17 @@ ; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulpd: ; SKX: # BB#0: @@ -2773,11 +3112,17 @@ ; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulps: ; SKX: # BB#0: @@ -2824,12 +3169,19 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: orpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: orpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: orpd: ; SKX: # BB#0: @@ -2884,12 +3236,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_orps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_orps: ; SKX: # BB#0: @@ -2944,12 +3303,19 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_perm2f128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_perm2f128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_perm2f128: ; SKX: # BB#0: @@ -3000,12 +3366,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] +; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilpd: ; SKX: # BB#0: @@ -3056,12 +3429,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00] +; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilpd_ymm: ; SKX: # BB#0: @@ -3112,12 +3492,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilps: ; SKX: # BB#0: @@ -3168,12 +3555,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00] +; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilps_ymm: ; SKX: # BB#0: @@ -3221,11 +3615,17 @@ ; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilvarpd: ; SKX: # BB#0: @@ -3270,11 +3670,17 @@ ; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilvarpd_ymm: ; SKX: # BB#0: @@ -3319,11 +3725,17 @@ ; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilvarps: ; SKX: # BB#0: @@ -3368,11 +3780,17 @@ ; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_permilvarps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_permilvarps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_permilvarps_ymm: ; SKX: # BB#0: @@ -3420,12 +3838,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rcpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00] +; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrcpps (%rdi), %ymm1 # sched: [4:1.00] +; SKYLAKE-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpps: ; SKX: # BB#0: @@ -3477,12 +3902,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKYLAKE-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:0.67] +; SKYLAKE-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundpd: ; SKX: # BB#0: @@ -3534,12 +3966,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKYLAKE-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:0.67] +; SKYLAKE-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundps: ; SKX: # BB#0: @@ -3591,12 +4030,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rsqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00] +; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [4:1.00] +; SKYLAKE-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtps: ; SKX: # BB#0: @@ -3648,12 +4094,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] +; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufpd: ; SKX: # BB#0: @@ -3701,11 +4154,17 @@ ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] +; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufps: ; SKX: # BB#0: @@ -3752,12 +4211,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00] +; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00] -; SKYLAKE-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [18:1.00] +; SKYLAKE-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtpd: ; SKX: # BB#0: @@ -3809,12 +4275,19 @@ ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00] +; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00] -; SKYLAKE-NEXT: vsqrtps (%rdi), %ymm1 # sched: [12:1.00] +; SKYLAKE-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00] ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtps: ; SKX: # BB#0: @@ -3863,11 +4336,17 @@ ; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subpd: ; SKX: # BB#0: @@ -3911,11 +4390,17 @@ ; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subps: ; SKX: # BB#0: @@ -3968,14 +4453,23 @@ ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:1.00] -; SKYLAKE-NEXT: vtestpd (%rdi), %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] +; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_testpd: ; SKX: # BB#0: @@ -4042,15 +4536,25 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:1.00] -; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [2:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKYLAKE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] +; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_testpd_ymm: ; SKX: # BB#0: @@ -4116,14 +4620,23 @@ ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:1.00] -; SKYLAKE-NEXT: vtestps (%rdi), %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] +; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_testps: ; SKX: # BB#0: @@ -4190,15 +4703,25 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_testps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_testps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:1.00] -; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [2:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKYLAKE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] +; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_testps_ymm: ; SKX: # BB#0: @@ -4258,12 +4781,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00] +; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhpd: ; SKX: # BB#0: @@ -4311,11 +4841,17 @@ ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhps: ; SKX: # BB#0: @@ -4362,12 +4898,19 @@ ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] +; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklpd: ; SKX: # BB#0: @@ -4415,11 +4958,17 @@ ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklps: ; SKX: # BB#0: @@ -4466,12 +5015,19 @@ ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_xorpd: ; SKX: # BB#0: @@ -4526,12 +5082,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_xorps: ; SKX: # BB#0: @@ -4580,10 +5143,15 @@ ; HASWELL-NEXT: vzeroall # sched: [16:16.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_zeroall: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vzeroall # sched: [16:16.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_zeroall: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vzeroall # sched: [16:4.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_zeroall: ; SKX: # BB#0: @@ -4620,10 +5188,15 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_zeroupper: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_zeroupper: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_zeroupper: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/bmi2-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/bmi2-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/bmi2-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/bmi2-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=COMMON --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=COMMON --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=COMMON --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) { @@ -13,12 +14,33 @@ ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_bzhi_i32: -; COMMON: # BB#0: -; COMMON-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50] -; COMMON-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] -; COMMON-NEXT: addl %ecx, %eax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_bzhi_i32: +; HASWELL: # BB#0: +; HASWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50] +; HASWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] +; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_bzhi_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_bzhi_i32: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] +; SKYLAKE-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] +; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_bzhi_i32: +; KNL: # BB#0: +; KNL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50] +; KNL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] +; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_bzhi_i32: ; ZNVER1: # BB#0: @@ -42,12 +64,33 @@ ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_bzhi_i64: -; COMMON: # BB#0: -; COMMON-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50] -; COMMON-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] -; COMMON-NEXT: addq %rcx, %rax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_bzhi_i64: +; HASWELL: # BB#0: +; HASWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50] +; HASWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] +; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_bzhi_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_bzhi_i64: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] +; SKYLAKE-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] +; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_bzhi_i64: +; KNL: # BB#0: +; KNL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50] +; KNL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] +; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_bzhi_i64: ; ZNVER1: # BB#0: @@ -75,14 +118,41 @@ ; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_mulx_i64: -; COMMON: # BB#0: -; COMMON-NEXT: movq %rdx, %rax # sched: [1:0.25] -; COMMON-NEXT: movq %rdi, %rdx # sched: [1:0.25] -; COMMON-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; COMMON-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00] -; COMMON-NEXT: orq %rcx, %rax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_mulx_i64: +; HASWELL: # BB#0: +; HASWELL-NEXT: movq %rdx, %rax # sched: [1:0.25] +; HASWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25] +; HASWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] +; HASWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00] +; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_mulx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movq %rdx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25] +; BROADWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_mulx_i64: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: movq %rdx, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: movq %rdi, %rdx # sched: [1:0.25] +; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] +; SKYLAKE-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] +; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_mulx_i64: +; KNL: # BB#0: +; KNL-NEXT: movq %rdx, %rax # sched: [1:0.25] +; KNL-NEXT: movq %rdi, %rdx # sched: [1:0.25] +; KNL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] +; KNL-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00] +; KNL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_mulx_i64: ; ZNVER1: # BB#0: @@ -114,12 +184,33 @@ ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_pdep_i32: -; COMMON: # BB#0: -; COMMON-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00] -; COMMON-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] -; COMMON-NEXT: addl %ecx, %eax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_pdep_i32: +; HASWELL: # BB#0: +; HASWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00] +; HASWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] +; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pdep_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00] +; BROADWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pdep_i32: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] +; SKYLAKE-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_pdep_i32: +; KNL: # BB#0: +; KNL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00] +; KNL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] +; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pdep_i32: ; ZNVER1: # BB#0: @@ -143,12 +234,33 @@ ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_pdep_i64: -; COMMON: # BB#0: -; COMMON-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00] -; COMMON-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] -; COMMON-NEXT: addq %rcx, %rax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_pdep_i64: +; HASWELL: # BB#0: +; HASWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00] +; HASWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] +; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pdep_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00] +; BROADWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pdep_i64: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] +; SKYLAKE-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] +; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_pdep_i64: +; KNL: # BB#0: +; KNL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00] +; KNL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] +; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pdep_i64: ; ZNVER1: # BB#0: @@ -172,12 +284,33 @@ ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_pext_i32: -; COMMON: # BB#0: -; COMMON-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00] -; COMMON-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] -; COMMON-NEXT: addl %ecx, %eax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_pext_i32: +; HASWELL: # BB#0: +; HASWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00] +; HASWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] +; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pext_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00] +; BROADWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pext_i32: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] +; SKYLAKE-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_pext_i32: +; KNL: # BB#0: +; KNL-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00] +; KNL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] +; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pext_i32: ; ZNVER1: # BB#0: @@ -201,12 +334,33 @@ ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; -; COMMON-LABEL: test_pext_i64: -; COMMON: # BB#0: -; COMMON-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00] -; COMMON-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] -; COMMON-NEXT: addq %rcx, %rax # sched: [1:0.25] -; COMMON-NEXT: retq # sched: [2:1.00] +; HASWELL-LABEL: test_pext_i64: +; HASWELL: # BB#0: +; HASWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00] +; HASWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] +; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pext_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00] +; BROADWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pext_i64: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] +; SKYLAKE-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] +; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; KNL-LABEL: test_pext_i64: +; KNL: # BB#0: +; KNL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00] +; KNL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] +; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; KNL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pext_i64: ; ZNVER1: # BB#0: @@ -237,12 +391,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rorx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rorx_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: rorxl $5, %edi, %ecx # sched: [1:1.00] -; SKYLAKE-NEXT: rorxl $5, (%rdx), %eax # sched: [1:0.50] +; SKYLAKE-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_rorx_i32: ; KNL: # BB#0: @@ -283,12 +444,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rorx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rorx_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: rorxq $5, %rdi, %rcx # sched: [1:1.00] -; SKYLAKE-NEXT: rorxq $5, (%rdx), %rax # sched: [1:0.50] +; SKYLAKE-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_rorx_i64: ; KNL: # BB#0: @@ -329,12 +497,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sarx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sarx_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: sarxl %esi, %edi, %ecx # sched: [1:1.00] -; SKYLAKE-NEXT: sarxl %esi, (%rdx), %eax # sched: [1:0.50] +; SKYLAKE-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_sarx_i32: ; KNL: # BB#0: @@ -371,12 +546,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sarx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sarx_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:1.00] -; SKYLAKE-NEXT: sarxq %rsi, (%rdx), %rax # sched: [1:0.50] +; SKYLAKE-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_sarx_i64: ; KNL: # BB#0: @@ -413,12 +595,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shlx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shlx_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: shlxl %esi, %edi, %ecx # sched: [1:1.00] -; SKYLAKE-NEXT: shlxl %esi, (%rdx), %eax # sched: [1:0.50] +; SKYLAKE-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_shlx_i32: ; KNL: # BB#0: @@ -455,12 +644,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shlx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shlx_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:1.00] -; SKYLAKE-NEXT: shlxq %rsi, (%rdx), %rax # sched: [1:0.50] +; SKYLAKE-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_shlx_i64: ; KNL: # BB#0: @@ -497,12 +693,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shrx_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shrx_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: shrxl %esi, %edi, %ecx # sched: [1:1.00] -; SKYLAKE-NEXT: shrxl %esi, (%rdx), %eax # sched: [1:0.50] +; SKYLAKE-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_shrx_i32: ; KNL: # BB#0: @@ -539,12 +742,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shrx_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shrx_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:1.00] -; SKYLAKE-NEXT: shrxq %rsi, (%rdx), %rax # sched: [1:0.50] +; SKYLAKE-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_shrx_i64: ; KNL: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/bmi-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/bmi-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/bmi-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/bmi-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -25,14 +26,23 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andn_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: andnl %esi, %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: notl %edi # sched: [1:0.25] +; BROADWELL-NEXT: andw (%rdx), %di # sched: [1:0.50] +; BROADWELL-NEXT: addl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andn_i16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: andnl %esi, %edi, %eax # sched: [1:0.50] ; SKYLAKE-NEXT: notl %edi # sched: [1:0.25] -; SKYLAKE-NEXT: andw (%rdx), %di # sched: [1:0.50] +; SKYLAKE-NEXT: andw (%rdx), %di # sched: [6:0.50] ; SKYLAKE-NEXT: addl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: # kill: %AX %AX %EAX -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andn_i16: ; BTVER2: # BB#0: @@ -74,12 +84,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andn_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] +; BROADWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andn_i32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; SKYLAKE-NEXT: andnl (%rdx), %edi, %eax # sched: [1:0.50] +; SKYLAKE-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andn_i32: ; BTVER2: # BB#0: @@ -117,12 +134,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andn_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] +; BROADWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andn_i64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; SKYLAKE-NEXT: andnq (%rdx), %rdi, %rax # sched: [1:0.50] +; SKYLAKE-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andn_i64: ; BTVER2: # BB#0: @@ -160,12 +184,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_bextr_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [2:0.50] +; BROADWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_bextr_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: bextrl %edi, (%rdx), %ecx # sched: [2:0.50] +; SKYLAKE-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50] ; SKYLAKE-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bextr_i32: ; BTVER2: # BB#0: @@ -203,12 +234,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_bextr_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [2:0.50] +; BROADWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_bextr_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [2:0.50] +; SKYLAKE-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50] ; SKYLAKE-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_bextr_i64: ; BTVER2: # BB#0: @@ -246,12 +284,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsi_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsil (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsil %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsi_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: blsil (%rsi), %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] ; SKYLAKE-NEXT: blsil %edi, %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blsi_i32: ; BTVER2: # BB#0: @@ -290,12 +335,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsi_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsiq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsi_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: blsiq (%rsi), %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] ; SKYLAKE-NEXT: blsiq %rdi, %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blsi_i64: ; BTVER2: # BB#0: @@ -334,12 +386,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsmsk_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsmskl (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsmsk_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: blsmskl (%rsi), %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] ; SKYLAKE-NEXT: blsmskl %edi, %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blsmsk_i32: ; BTVER2: # BB#0: @@ -378,12 +437,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsmsk_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsmskq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsmsk_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: blsmskq (%rsi), %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] ; SKYLAKE-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blsmsk_i64: ; BTVER2: # BB#0: @@ -422,12 +488,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsr_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsrl (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsr_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: blsrl (%rsi), %ecx # sched: [1:0.50] +; SKYLAKE-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] ; SKYLAKE-NEXT: blsrl %edi, %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blsr_i32: ; BTVER2: # BB#0: @@ -466,12 +539,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blsr_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: blsrq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blsr_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: blsrq (%rsi), %rcx # sched: [1:0.50] +; SKYLAKE-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] ; SKYLAKE-NEXT: blsrq %rdi, %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blsr_i64: ; BTVER2: # BB#0: @@ -512,13 +592,21 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cttz_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: tzcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cttz_i16: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: tzcntw (%rsi), %cx # sched: [3:1.00] +; SKYLAKE-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] ; SKYLAKE-NEXT: tzcntw %di, %ax # sched: [3:1.00] ; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: # kill: %AX %AX %EAX -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cttz_i16: ; BTVER2: # BB#0: @@ -558,12 +646,19 @@ ; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cttz_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: tzcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cttz_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: tzcntl (%rsi), %ecx # sched: [3:1.00] +; SKYLAKE-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] ; SKYLAKE-NEXT: tzcntl %edi, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cttz_i32: ; BTVER2: # BB#0: @@ -601,12 +696,19 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cttz_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: tzcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cttz_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: tzcntq (%rsi), %rcx # sched: [3:1.00] +; SKYLAKE-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] ; SKYLAKE-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] ; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cttz_i64: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/f16c-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/f16c-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/f16c-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/f16c-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -28,12 +29,19 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtph2ps_128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtph2ps_128: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [4:0.50] +; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50] ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtph2ps_128: ; BTVER2: # BB#0: @@ -78,12 +86,19 @@ ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtph2ps_256: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] +; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtph2ps_256: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [4:0.50] +; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50] ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtph2ps_256: ; BTVER2: # BB#0: @@ -125,11 +140,17 @@ ; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtps2ph_128: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtps2ph_128: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtps2ph_128: ; BTVER2: # BB#0: @@ -172,12 +193,19 @@ ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vcvtps2ph_256: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00] +; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vcvtps2ph_256: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] +; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_vcvtps2ph_256: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/fma-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/fma-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/fma-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/fma-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX @@ -27,11 +28,17 @@ ; HASWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmadd213pd: ; KNL: # BB#0: @@ -69,11 +76,17 @@ ; HASWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmadd213pd_ymm: ; KNL: # BB#0: @@ -111,11 +124,17 @@ ; HASWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmadd213ps: ; KNL: # BB#0: @@ -153,11 +172,17 @@ ; HASWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmadd213ps_ymm: ; KNL: # BB#0: @@ -195,11 +220,17 @@ ; HASWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmadd213sd: ; KNL: # BB#0: @@ -237,11 +268,17 @@ ; HASWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmadd213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmadd213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmadd213ss: ; KNL: # BB#0: @@ -291,11 +328,17 @@ ; HASWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddsubpd: ; KNL: # BB#0: @@ -333,11 +376,17 @@ ; HASWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddsubpd_ymm: ; KNL: # BB#0: @@ -375,11 +424,17 @@ ; HASWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddsubps: ; KNL: # BB#0: @@ -417,11 +472,17 @@ ; HASWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmaddsubps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmaddsubps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmaddsubps_ymm: ; KNL: # BB#0: @@ -471,11 +532,17 @@ ; HASWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubaddpd: ; KNL: # BB#0: @@ -513,11 +580,17 @@ ; HASWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddpd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddpd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubaddpd_ymm: ; KNL: # BB#0: @@ -555,11 +628,17 @@ ; HASWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubaddps: ; KNL: # BB#0: @@ -597,11 +676,17 @@ ; HASWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsubaddps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsubaddps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsubaddps_ymm: ; KNL: # BB#0: @@ -651,11 +736,17 @@ ; HASWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsub213pd: ; KNL: # BB#0: @@ -693,11 +784,17 @@ ; HASWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsub213pd_ymm: ; KNL: # BB#0: @@ -735,11 +832,17 @@ ; HASWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsub213ps: ; KNL: # BB#0: @@ -777,11 +880,17 @@ ; HASWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsub213ps_ymm: ; KNL: # BB#0: @@ -819,11 +928,17 @@ ; HASWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsub213sd: ; KNL: # BB#0: @@ -861,11 +976,17 @@ ; HASWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfmsub213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfmsub213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfmsub213ss: ; KNL: # BB#0: @@ -915,11 +1036,17 @@ ; HASWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmadd213pd: ; KNL: # BB#0: @@ -957,11 +1084,17 @@ ; HASWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmadd213pd_ymm: ; KNL: # BB#0: @@ -999,11 +1132,17 @@ ; HASWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmadd213ps: ; KNL: # BB#0: @@ -1041,11 +1180,17 @@ ; HASWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmadd213ps_ymm: ; KNL: # BB#0: @@ -1083,11 +1228,17 @@ ; HASWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmadd213sd: ; KNL: # BB#0: @@ -1125,11 +1276,17 @@ ; HASWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmadd213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmadd213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmadd213ss: ; KNL: # BB#0: @@ -1179,11 +1336,17 @@ ; HASWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsub213pd: ; KNL: # BB#0: @@ -1221,11 +1384,17 @@ ; HASWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213pd_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213pd_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsub213pd_ymm: ; KNL: # BB#0: @@ -1263,11 +1432,17 @@ ; HASWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsub213ps: ; KNL: # BB#0: @@ -1305,11 +1480,17 @@ ; HASWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213ps_ymm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213ps_ymm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsub213ps_ymm: ; KNL: # BB#0: @@ -1347,11 +1528,17 @@ ; HASWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsub213sd: ; KNL: # BB#0: @@ -1389,11 +1576,17 @@ ; HASWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_vfnmsub213ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_vfnmsub213ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; KNL-LABEL: test_vfnmsub213ss: ; KNL: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/GlobalISel/select-blsi.mir llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/GlobalISel/select-blsi.mir --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/GlobalISel/select-blsi.mir 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/GlobalISel/select-blsi.mir 2017-10-17 14:41:15.000000000 +0000 @@ -0,0 +1,61 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+bmi -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that rules where multiple operands must be the same operand successfully +# match. Also test that the rules do not match when they're not the same +# operand. +# +# This test covers the case when OtherInsnID and OtherOpIdx are different in a +# GIM_CheckIsSameOperand. + +--- +name: test_blsi32rr +# CHECK-LABEL: name: test_blsi32rr +alignment: 4 +legalized: true +regBankSelected: true +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } +# G_SUB and G_AND both use %0 so we should match this. +# CHECK: %3 = BLSI32rr %0 +body: | + bb.1: + liveins: %edi + + %0(s32) = COPY %edi + %1(s32) = G_CONSTANT i32 0 + %2(s32) = G_SUB %1, %0 + %3(s32) = G_AND %2, %0 + %edi = COPY %3 + +... +--- +name: test_blsi32rr_nomatch +# CHECK-LABEL: name: test_blsi32rr_nomatch +alignment: 4 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } +# G_SUB and G_AND use different operands so we shouldn't match this. +# CHECK-NOT: BLSI32rr +body: | + bb.1: + liveins: %edi + + %0(s32) = COPY %edi + %1(s32) = G_CONSTANT i32 0 + %2(s32) = G_SUB %1, %1 + %3(s32) = G_AND %2, %0 + %edi = COPY %3 +... diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/implicit-null-check.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/implicit-null-check.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/implicit-null-check.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/implicit-null-check.ll 2017-10-17 14:41:15.000000000 +0000 @@ -182,6 +182,28 @@ ret void } +define i32 @imp_null_check_neg_gep_load(i32* %x) { +; CHECK-LABEL: _imp_null_check_neg_gep_load: +; CHECK: [[BB0_imp_null_check_neg_gep_load:L[^:]+]]: +; CHECK: movl -128(%rdi), %eax +; CHECK: retq +; CHECK: [[BB1_imp_null_check_neg_gep_load:LBB7_[0-9]+]]: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %x.gep = getelementptr i32, i32* %x, i32 -32 + %t = load i32, i32* %x.gep + ret i32 %t +} + !0 = !{} ; CHECK-LABEL: __LLVM_FaultMaps: @@ -194,7 +216,7 @@ ; CHECK-NEXT: .short 0 ; # functions: -; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long 8 ; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_add_result @@ -262,6 +284,19 @@ ; CHECK-NEXT: .long [[BB1_imp_null_check_load]]-_imp_null_check_load ; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_neg_gep_load +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long [[BB0_imp_null_check_neg_gep_load]]-_imp_null_check_neg_gep_load +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long [[BB1_imp_null_check_neg_gep_load]]-_imp_null_check_neg_gep_load + +; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_store ; NumFaultingPCs ; CHECK-NEXT: .long 1 @@ -289,7 +324,7 @@ ; OBJDUMP: FaultMap table: ; OBJDUMP-NEXT: Version: 0x1 -; OBJDUMP-NEXT: NumFunctions: 7 +; OBJDUMP-NEXT: NumFunctions: 8 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 @@ -301,6 +336,8 @@ ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 3 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 +; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 4 +; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingStore, faulting PC offset: 0, handling PC offset: 7 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 11 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/implicit-null-check-negative.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/implicit-null-check-negative.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/implicit-null-check-negative.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/implicit-null-check-negative.ll 2017-10-17 14:41:15.000000000 +0000 @@ -37,6 +37,22 @@ ret i32 %t } +define i32 @imp_null_check_neg_gep_load(i32* %x) { + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: +; null - 5000 * sizeof(i32) lies outside the null page and hence the +; load to %t cannot be assumed to be reliably faulting. + %x.gep = getelementptr i32, i32* %x, i32 -5000 + %t = load i32, i32* %x.gep + ret i32 %t +} + define i32 @imp_null_check_load_no_md(i32* %x) { ; This is fine, except it is missing the !make.implicit metadata. entry: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/lea32-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/lea32-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/lea32-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/lea32-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -47,11 +48,17 @@ ; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_offset: ; BTVER2: # BB#0: @@ -105,11 +112,17 @@ ; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_offset_big: ; BTVER2: # BB#0: @@ -169,12 +182,19 @@ ; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add: ; BTVER2: # BB#0: @@ -238,13 +258,21 @@ ; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $16, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl $16, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_offset: ; BTVER2: # BB#0: @@ -312,6 +340,15 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $-4096, %eax # imm = 0xF000 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -319,7 +356,7 @@ ; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl $-4096, %eax # imm = 0xF000 ; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_offset_big: ; BTVER2: # BB#0: @@ -376,11 +413,17 @@ ; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_mul: ; BTVER2: # BB#0: @@ -437,12 +480,19 @@ ; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $-32, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl $-32, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset: ; BTVER2: # BB#0: @@ -503,13 +553,21 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $10000, %eax # imm = 0x2710 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl $10000, %eax # imm = 0x2710 ; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset_big: ; BTVER2: # BB#0: @@ -569,12 +627,19 @@ ; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_scale: ; BTVER2: # BB#0: @@ -639,13 +704,21 @@ ; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $96, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI ; SKYLAKE-NEXT: # kill: %EDI %EDI %RDI ; SKYLAKE-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl $96, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset: ; BTVER2: # BB#0: @@ -714,6 +787,15 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: # kill: %ESI %ESI %RSI +; BROADWELL-NEXT: # kill: %EDI %EDI %RDI +; BROADWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] +; BROADWELL-NEXT: addl $-1200, %eax # imm = 0xFB50 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: # kill: %ESI %ESI %RSI @@ -721,7 +803,7 @@ ; SKYLAKE-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: addl $-1200, %eax # imm = 0xFB50 ; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset_big: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/lea64-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/lea64-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/lea64-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/lea64-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -42,10 +43,15 @@ ; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_offset: ; BTVER2: # BB#0: @@ -92,10 +98,15 @@ ; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_offset_big: ; BTVER2: # BB#0: @@ -143,10 +154,15 @@ ; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add: ; BTVER2: # BB#0: @@ -196,11 +212,17 @@ ; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $16, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq $16, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_offset: ; BTVER2: # BB#0: @@ -254,12 +276,19 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $-4096, %rax # imm = 0xF000 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq $-4096, %rax # imm = 0xF000 ; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_offset_big: ; BTVER2: # BB#0: @@ -307,10 +336,15 @@ ; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_mul: ; BTVER2: # BB#0: @@ -360,11 +394,17 @@ ; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $-32, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq $-32, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset: ; BTVER2: # BB#0: @@ -418,12 +458,19 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_mul_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $10000, %rax # imm = 0x2710 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_mul_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq $10000, %rax # imm = 0x2710 ; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset_big: ; BTVER2: # BB#0: @@ -471,10 +518,15 @@ ; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_scale: ; BTVER2: # BB#0: @@ -525,11 +577,17 @@ ; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $96, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq $96, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset: ; BTVER2: # BB#0: @@ -584,12 +642,19 @@ ; HASWELL-NEXT: # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lea_add_scale_offset_big: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] +; BROADWELL-NEXT: addq $-1200, %rax # imm = 0xFB50 +; BROADWELL-NEXT: # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lea_add_scale_offset_big: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: addq $-1200, %rax # imm = 0xFB50 ; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset_big: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/lzcnt-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/lzcnt-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/lzcnt-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/lzcnt-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) { ; GENERIC-LABEL: test_ctlz_i16: @@ -23,13 +24,21 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctlz_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lzcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctlz_i16: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: lzcntw (%rsi), %cx # sched: [3:1.00] +; SKYLAKE-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] ; SKYLAKE-NEXT: lzcntw %di, %ax # sched: [3:1.00] ; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: # kill: %AX %AX %EAX -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ctlz_i16: ; BTVER2: # BB#0: @@ -69,12 +78,19 @@ ; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctlz_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lzcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctlz_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: lzcntl (%rsi), %ecx # sched: [3:1.00] +; SKYLAKE-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] ; SKYLAKE-NEXT: lzcntl %edi, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ctlz_i32: ; BTVER2: # BB#0: @@ -112,12 +128,19 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctlz_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lzcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctlz_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: lzcntq (%rsi), %rcx # sched: [3:1.00] +; SKYLAKE-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] ; SKYLAKE-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] ; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ctlz_i64: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/mmx-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/mmx-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/mmx-schedule.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/mmx-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -0,0 +1,6967 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 + +define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { +; GENERIC-LABEL: test_cvtpd2pi: +; GENERIC: # BB#0: +; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] +; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] +; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_cvtpd2pi: +; ATOM: # BB#0: +; ATOM-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [8:4.00] +; ATOM-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [7:3.50] +; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_cvtpd2pi: +; SLM: # BB#0: +; SLM-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [7:1.00] +; SLM-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:0.50] +; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SLM-NEXT: movd %mm1, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_cvtpd2pi: +; SANDY: # BB#0: +; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] +; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] +; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_cvtpd2pi: +; HASWELL: # BB#0: +; HASWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [4:1.00] +; HASWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] +; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_cvtpd2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] +; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_cvtpd2pi: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] +; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_cvtpd2pi: +; SKX: # BB#0: +; SKX-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] +; SKX-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] +; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKX-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_cvtpd2pi: +; BTVER2: # BB#0: +; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00] +; BTVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [3:1.00] +; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm1, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_cvtpd2pi: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [12:1.00] +; ZNVER1-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm1, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) + %2 = load <2 x double>, <2 x double> *%a1, align 16 + %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2) + %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone + +define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { +; GENERIC-LABEL: test_cvtpi2pd: +; GENERIC: # BB#0: +; GENERIC-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_cvtpi2pd: +; ATOM: # BB#0: +; ATOM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00] +; ATOM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [7:3.50] +; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_cvtpi2pd: +; SLM: # BB#0: +; SLM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00] +; SLM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:0.50] +; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_cvtpi2pd: +; SANDY: # BB#0: +; SANDY-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_cvtpi2pd: +; HASWELL: # BB#0: +; HASWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [4:1.00] +; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_cvtpi2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_cvtpi2pd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] +; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] +; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_cvtpi2pd: +; SKX: # BB#0: +; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_cvtpi2pd: +; BTVER2: # BB#0: +; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00] +; BTVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_cvtpi2pd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00] +; ZNVER1-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] +; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) + %2 = load x86_mmx, x86_mmx *%a1, align 8 + %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2) + %4 = fadd <2 x double> %1, %3 + ret <2 x double> %4 +} +declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone + +define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize { +; GENERIC-LABEL: test_cvtpi2ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_cvtpi2ps: +; ATOM: # BB#0: +; ATOM-NEXT: cvtpi2ps (%rdi), %xmm1 +; ATOM-NEXT: cvtpi2ps %mm0, %xmm0 +; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_cvtpi2ps: +; SLM: # BB#0: +; SLM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00] +; SLM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:0.50] +; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_cvtpi2ps: +; SANDY: # BB#0: +; SANDY-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_cvtpi2ps: +; HASWELL: # BB#0: +; HASWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [3:1.00] +; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_cvtpi2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_cvtpi2ps: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] +; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] +; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_cvtpi2ps: +; SKX: # BB#0: +; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] +; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_cvtpi2ps: +; BTVER2: # BB#0: +; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] +; BTVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_cvtpi2ps: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00] +; ZNVER1-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:1.00] +; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0) + %2 = load x86_mmx, x86_mmx *%a1, align 8 + %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2) + %4 = fadd <4 x float> %1, %3 + ret <4 x float> %4 +} +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone + +define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { +; GENERIC-LABEL: test_cvtps2pi: +; GENERIC: # BB#0: +; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] +; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm1, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_cvtps2pi: +; ATOM: # BB#0: +; ATOM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:5.00] +; ATOM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [5:5.00] +; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; ATOM-NEXT: movd %mm1, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_cvtps2pi: +; SLM: # BB#0: +; SLM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [7:1.00] +; SLM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:0.50] +; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SLM-NEXT: movd %mm1, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_cvtps2pi: +; SANDY: # BB#0: +; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] +; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] +; SANDY-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; SANDY-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_cvtps2pi: +; HASWELL: # BB#0: +; HASWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] +; HASWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [3:1.00] +; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; HASWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_cvtps2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [3:1.00] +; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_cvtps2pi: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] +; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_cvtps2pi: +; SKX: # BB#0: +; SKX-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] +; SKX-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] +; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKX-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_cvtps2pi: +; BTVER2: # BB#0: +; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] +; BTVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] +; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm1, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_cvtps2pi: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: cvtps2pi (%rdi), %mm1 # sched: [12:1.00] +; ZNVER1-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm1, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) + %2 = load <4 x float>, <4 x float> *%a1, align 16 + %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2) + %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone + +define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { +; GENERIC-LABEL: test_cvttpd2pi: +; GENERIC: # BB#0: +; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] +; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] +; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_cvttpd2pi: +; ATOM: # BB#0: +; ATOM-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [8:4.00] +; ATOM-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [7:3.50] +; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_cvttpd2pi: +; SLM: # BB#0: +; SLM-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [7:1.00] +; SLM-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:0.50] +; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SLM-NEXT: movd %mm1, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_cvttpd2pi: +; SANDY: # BB#0: +; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] +; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] +; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_cvttpd2pi: +; HASWELL: # BB#0: +; HASWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [4:1.00] +; HASWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] +; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_cvttpd2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] +; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_cvttpd2pi: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] +; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_cvttpd2pi: +; SKX: # BB#0: +; SKX-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] +; SKX-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] +; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKX-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_cvttpd2pi: +; BTVER2: # BB#0: +; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00] +; BTVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [3:1.00] +; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm1, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_cvttpd2pi: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [12:1.00] +; ZNVER1-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm1, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) + %2 = load <2 x double>, <2 x double> *%a1, align 16 + %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2) + %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone + +define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { +; GENERIC-LABEL: test_cvttps2pi: +; GENERIC: # BB#0: +; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] +; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm1, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_cvttps2pi: +; ATOM: # BB#0: +; ATOM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:5.00] +; ATOM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [5:5.00] +; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; ATOM-NEXT: movd %mm1, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_cvttps2pi: +; SLM: # BB#0: +; SLM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [7:1.00] +; SLM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:0.50] +; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SLM-NEXT: movd %mm1, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_cvttps2pi: +; SANDY: # BB#0: +; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] +; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] +; SANDY-NEXT: por %mm0, %mm1 # sched: [1:1.00] +; SANDY-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_cvttps2pi: +; HASWELL: # BB#0: +; HASWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] +; HASWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [3:1.00] +; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; HASWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_cvttps2pi: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [3:1.00] +; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_cvttps2pi: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] +; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] +; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_cvttps2pi: +; SKX: # BB#0: +; SKX-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] +; SKX-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] +; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; SKX-NEXT: movd %mm1, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_cvttps2pi: +; BTVER2: # BB#0: +; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] +; BTVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] +; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm1, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_cvttps2pi: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: cvttps2pi (%rdi), %mm1 # sched: [12:1.00] +; ZNVER1-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm1, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) + %2 = load <4 x float>, <4 x float> *%a1, align 16 + %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) + %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone + +define void @test_emms() optsize { +; GENERIC-LABEL: test_emms: +; GENERIC: # BB#0: +; GENERIC-NEXT: emms +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_emms: +; ATOM: # BB#0: +; ATOM-NEXT: emms # sched: [5:2.50] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_emms: +; SLM: # BB#0: +; SLM-NEXT: emms +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_emms: +; SANDY: # BB#0: +; SANDY-NEXT: emms +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_emms: +; HASWELL: # BB#0: +; HASWELL-NEXT: emms # sched: [31:10.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_emms: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: emms # sched: [31:10.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_emms: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: emms # sched: [10:4.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_emms: +; SKX: # BB#0: +; SKX-NEXT: emms # sched: [10:4.50] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_emms: +; BTVER2: # BB#0: +; BTVER2-NEXT: emms +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_emms: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: emms +; ZNVER1-NEXT: retq # sched: [1:0.50] + call void @llvm.x86.mmx.emms() + ret void +} +declare void @llvm.x86.mmx.emms() + +define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize { +; GENERIC-LABEL: test_maskmovq: +; GENERIC: # BB#0: +; GENERIC-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_maskmovq: +; ATOM: # BB#0: +; ATOM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_maskmovq: +; SLM: # BB#0: +; SLM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_maskmovq: +; SANDY: # BB#0: +; SANDY-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_maskmovq: +; HASWELL: # BB#0: +; HASWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_maskmovq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_maskmovq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_maskmovq: +; SKX: # BB#0: +; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_maskmovq: +; BTVER2: # BB#0: +; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_maskmovq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: maskmovq %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: retq # sched: [1:0.50] + call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) + ret void +} +declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind + +define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { +; GENERIC-LABEL: test_movd: +; GENERIC: # BB#0: +; GENERIC-NEXT: movd %edi, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [4:0.50] +; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [7:1.00] +; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: movd %mm1, %ecx # sched: [1:0.33] +; GENERIC-NEXT: movd %mm0, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_movd: +; ATOM: # BB#0: +; ATOM-NEXT: movd %edi, %xmm0 # sched: [1:1.00] +; ATOM-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00] +; ATOM-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [1:1.00] +; ATOM-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; ATOM-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [1:1.00] +; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm1, %ecx # sched: [3:3.00] +; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00] +; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_movd: +; SLM: # BB#0: +; SLM-NEXT: movd %edi, %xmm0 # sched: [1:0.50] +; SLM-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00] +; SLM-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [3:1.00] +; SLM-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SLM-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [4:1.00] +; SLM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: movd %mm1, %ecx # sched: [1:0.50] +; SLM-NEXT: movd %mm0, %eax # sched: [1:0.50] +; SLM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_movd: +; SANDY: # BB#0: +; SANDY-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [7:1.00] +; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: movd %mm1, %ecx # sched: [1:0.33] +; SANDY-NEXT: movd %mm0, %eax # sched: [1:0.33] +; SANDY-NEXT: movl %ecx, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_movd: +; HASWELL: # BB#0: +; HASWELL-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; HASWELL-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; HASWELL-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; HASWELL-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; HASWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm1, %ecx # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] +; HASWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_movd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm1, %ecx # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] +; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_movd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] +; SKYLAKE-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SKYLAKE-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:0.50] +; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SKYLAKE-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:0.50] +; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm1, %ecx # sched: [2:1.00] +; SKYLAKE-NEXT: movd %mm0, %eax # sched: [2:1.00] +; SKYLAKE-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_movd: +; SKX: # BB#0: +; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] +; SKX-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp) # sched: [4:1.00] +; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:0.50] +; SKX-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp) # sched: [4:1.00] +; SKX-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:0.50] +; SKX-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: movd %mm1, %ecx # sched: [2:1.00] +; SKX-NEXT: movd %mm0, %eax # sched: [2:1.00] +; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_movd: +; BTVER2: # BB#0: +; BTVER2-NEXT: vmovd %edi, %xmm0 # sched: [1:0.17] +; BTVER2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:1.00] +; BTVER2-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BTVER2-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:1.00] +; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm1, %ecx # sched: [1:0.17] +; BTVER2-NEXT: movd %mm0, %eax # sched: [1:0.17] +; BTVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_movd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: vmovd %edi, %xmm0 # sched: [3:1.00] +; ZNVER1-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:0.50] +; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [8:0.50] +; ZNVER1-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:0.50] +; ZNVER1-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [8:0.50] +; ZNVER1-NEXT: paddd %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm1, %ecx # sched: [2:1.00] +; ZNVER1-NEXT: movd %mm0, %eax # sched: [2:1.00] +; ZNVER1-NEXT: movl %ecx, (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = insertelement <2 x i32> undef, i32 %a1, i32 0 + %2 = bitcast <2 x i32> %1 to x86_mmx + %3 = load i32, i32 *%a2 + %4 = insertelement <2 x i32> undef, i32 %3, i32 0 + %5 = bitcast <2 x i32> %4 to x86_mmx + %6 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5) + %7 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6) + %8 = bitcast x86_mmx %6 to <2 x i32> + %9 = bitcast x86_mmx %7 to <2 x i32> + %10 = extractelement <2 x i32> %8, i32 0 + %11 = extractelement <2 x i32> %9, i32 0 + store i32 %10, i32* %a2 + ret i32 %11 +} + +define i64 @test_movdq2q(<2 x i64> %a0) optsize { +; GENERIC-LABEL: test_movdq2q: +; GENERIC: # BB#0: +; GENERIC-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] +; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_movdq2q: +; ATOM: # BB#0: +; ATOM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_movdq2q: +; SLM: # BB#0: +; SLM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_movdq2q: +; SANDY: # BB#0: +; SANDY-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] +; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_movdq2q: +; HASWELL: # BB#0: +; HASWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] +; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_movdq2q: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] +; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_movdq2q: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] +; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_movdq2q: +; SKX: # BB#0: +; SKX-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] +; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_movdq2q: +; BTVER2: # BB#0: +; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.17] +; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_movdq2q: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = extractelement <2 x i64> %a0, i32 0 + %2 = bitcast i64 %1 to x86_mmx + %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} + +define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize { +; GENERIC-LABEL: test_movntq: +; GENERIC: # BB#0: +; GENERIC-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_movntq: +; ATOM: # BB#0: +; ATOM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_movntq: +; SLM: # BB#0: +; SLM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_movntq: +; SANDY: # BB#0: +; SANDY-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_movntq: +; HASWELL: # BB#0: +; HASWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_movntq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_movntq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_movntq: +; SKX: # BB#0: +; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_movntq: +; BTVER2: # BB#0: +; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_movntq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: movntq %mm0, (%rdi) # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] + call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1) + ret void +} +declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind + +define void @test_movq(i64 *%a0) { +; GENERIC-LABEL: test_movq: +; GENERIC: # BB#0: +; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [4:0.50] +; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_movq: +; ATOM: # BB#0: +; ATOM-NEXT: movq (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_movq: +; SLM: # BB#0: +; SLM-NEXT: movq (%rdi), %mm0 # sched: [3:1.00] +; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SLM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_movq: +; SANDY: # BB#0: +; SANDY-NEXT: movq (%rdi), %mm0 # sched: [4:0.50] +; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] +; SANDY-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_movq: +; HASWELL: # BB#0: +; HASWELL-NEXT: movq (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_movq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_movq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] +; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_movq: +; SKX: # BB#0: +; SKX-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] +; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_movq: +; BTVER2: # BB#0: +; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00] +; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_movq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: movq (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movq %mm0, (%rdi) # sched: [1:0.50] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = load i64, i64* %a0, align 8 + %2 = bitcast i64 %1 to x86_mmx + %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + store i64 %4, i64* %a0, align 8 + ret void +} + +define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize { +; GENERIC-LABEL: test_movq2dq: +; GENERIC: # BB#0: +; GENERIC-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_movq2dq: +; ATOM: # BB#0: +; ATOM-NEXT: movq2dq %mm0, %xmm0 +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_movq2dq: +; SLM: # BB#0: +; SLM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_movq2dq: +; SANDY: # BB#0: +; SANDY-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_movq2dq: +; HASWELL: # BB#0: +; HASWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_movq2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_movq2dq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_movq2dq: +; SKX: # BB#0: +; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_movq2dq: +; BTVER2: # BB#0: +; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_movq2dq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.25] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = bitcast x86_mmx %a0 to i64 + %2 = insertelement <2 x i64> undef, i64 %1, i32 0 + ret <2 x i64> %2 +} + +define i64 @test_pabsb(x86_mmx *%a0) optsize { +; GENERIC-LABEL: test_pabsb: +; GENERIC: # BB#0: +; GENERIC-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pabsb: +; ATOM: # BB#0: +; ATOM-NEXT: pabsb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pabsb: +; SLM: # BB#0: +; SLM-NEXT: pabsb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pabsb: +; SANDY: # BB#0: +; SANDY-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pabsb: +; HASWELL: # BB#0: +; HASWELL-NEXT: pabsb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pabsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pabsb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pabsb: +; SKX: # BB#0: +; SKX-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pabsb: +; BTVER2: # BB#0: +; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pabsb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pabsb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: pabsb %mm0, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone + +define i64 @test_pabsd(x86_mmx *%a0) optsize { +; GENERIC-LABEL: test_pabsd: +; GENERIC: # BB#0: +; GENERIC-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pabsd: +; ATOM: # BB#0: +; ATOM-NEXT: pabsd (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pabsd: +; SLM: # BB#0: +; SLM-NEXT: pabsd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pabsd: +; SANDY: # BB#0: +; SANDY-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pabsd: +; HASWELL: # BB#0: +; HASWELL-NEXT: pabsd (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pabsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pabsd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pabsd: +; SKX: # BB#0: +; SKX-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pabsd: +; BTVER2: # BB#0: +; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pabsd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pabsd (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: pabsd %mm0, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone + +define i64 @test_pabsw(x86_mmx *%a0) optsize { +; GENERIC-LABEL: test_pabsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pabsw: +; ATOM: # BB#0: +; ATOM-NEXT: pabsw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pabsw: +; SLM: # BB#0: +; SLM-NEXT: pabsw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pabsw: +; SANDY: # BB#0: +; SANDY-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pabsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pabsw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pabsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pabsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pabsw: +; SKX: # BB#0: +; SKX-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pabsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pabsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pabsw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: pabsw %mm0, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) + %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone + +define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_packssdw: +; GENERIC: # BB#0: +; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_packssdw: +; ATOM: # BB#0: +; ATOM-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: packssdw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_packssdw: +; SLM: # BB#0: +; SLM-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: packssdw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_packssdw: +; SANDY: # BB#0: +; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_packssdw: +; HASWELL: # BB#0: +; HASWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: packssdw (%rdi), %mm0 # sched: [2:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_packssdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_packssdw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_packssdw: +; SKX: # BB#0: +; SKX-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_packssdw: +; BTVER2: # BB#0: +; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_packssdw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] +; ZNVER1-NEXT: packssdw (%rdi), %mm0 # sched: [1:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_packsswb: +; GENERIC: # BB#0: +; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_packsswb: +; ATOM: # BB#0: +; ATOM-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: packsswb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_packsswb: +; SLM: # BB#0: +; SLM-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: packsswb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_packsswb: +; SANDY: # BB#0: +; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_packsswb: +; HASWELL: # BB#0: +; HASWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: packsswb (%rdi), %mm0 # sched: [2:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_packsswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_packsswb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_packsswb: +; SKX: # BB#0: +; SKX-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_packsswb: +; BTVER2: # BB#0: +; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_packsswb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] +; ZNVER1-NEXT: packsswb (%rdi), %mm0 # sched: [1:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_packuswb: +; GENERIC: # BB#0: +; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_packuswb: +; ATOM: # BB#0: +; ATOM-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: packuswb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_packuswb: +; SLM: # BB#0: +; SLM-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: packuswb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_packuswb: +; SANDY: # BB#0: +; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_packuswb: +; HASWELL: # BB#0: +; HASWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: packuswb (%rdi), %mm0 # sched: [2:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_packuswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_packuswb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_packuswb: +; SKX: # BB#0: +; SKX-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_packuswb: +; BTVER2: # BB#0: +; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_packuswb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] +; ZNVER1-NEXT: packuswb (%rdi), %mm0 # sched: [1:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddb: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddb: +; ATOM: # BB#0: +; ATOM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddb: +; SLM: # BB#0: +; SLM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddb: +; SANDY: # BB#0: +; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddb: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddb: +; SKX: # BB#0: +; SKX-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddb: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddd: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddd: +; ATOM: # BB#0: +; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddd (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddd: +; SLM: # BB#0: +; SLM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddd: +; SANDY: # BB#0: +; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddd: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddd (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddd: +; SKX: # BB#0: +; SKX-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddd: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddd %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddd (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddq: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddq: +; ATOM: # BB#0: +; ATOM-NEXT: paddq %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: paddq (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddq: +; SLM: # BB#0: +; SLM-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddq (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddq: +; SANDY: # BB#0: +; SANDY-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; SANDY-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddq: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddq (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddq: +; SKX: # BB#0: +; SKX-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddq: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddq (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddq %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddq (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddsb: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddsb: +; ATOM: # BB#0: +; ATOM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddsb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddsb: +; SLM: # BB#0: +; SLM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddsb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddsb: +; SANDY: # BB#0: +; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddsb: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddsb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddsb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddsb: +; SKX: # BB#0: +; SKX-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddsb: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddsb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddsb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddsb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddsw: +; ATOM: # BB#0: +; ATOM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddsw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddsw: +; SLM: # BB#0: +; SLM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddsw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddsw: +; SANDY: # BB#0: +; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddsw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddsw: +; SKX: # BB#0: +; SKX-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddsw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddsw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddusb: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddusb: +; ATOM: # BB#0: +; ATOM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddusb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddusb: +; SLM: # BB#0: +; SLM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddusb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddusb: +; SANDY: # BB#0: +; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddusb: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddusb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddusb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddusb: +; SKX: # BB#0: +; SKX-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddusb: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddusb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddusb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddusb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddusw: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddusw: +; ATOM: # BB#0: +; ATOM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddusw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddusw: +; SLM: # BB#0: +; SLM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddusw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddusw: +; SANDY: # BB#0: +; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddusw: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddusw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddusw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddusw: +; SKX: # BB#0: +; SKX-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddusw: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddusw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddusw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddusw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_paddw: +; GENERIC: # BB#0: +; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_paddw: +; ATOM: # BB#0: +; ATOM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: paddw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_paddw: +; SLM: # BB#0: +; SLM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: paddw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_paddw: +; SANDY: # BB#0: +; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_paddw: +; HASWELL: # BB#0: +; HASWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: paddw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_paddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_paddw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_paddw: +; SKX: # BB#0: +; SKX-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_paddw: +; BTVER2: # BB#0: +; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: paddw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_paddw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: paddw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: paddw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_palignr: +; GENERIC: # BB#0: +; GENERIC-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_palignr: +; ATOM: # BB#0: +; ATOM-NEXT: palignr $1, %mm1, %mm0 +; ATOM-NEXT: palignr $1, (%rdi), %mm0 +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_palignr: +; SLM: # BB#0: +; SLM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: palignr $1, (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_palignr: +; SANDY: # BB#0: +; SANDY-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] +; SANDY-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_palignr: +; HASWELL: # BB#0: +; HASWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_palignr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_palignr: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_palignr: +; SKX: # BB#0: +; SKX-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_palignr: +; BTVER2: # BB#0: +; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_palignr: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: palignr $1, (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone + +define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pand: +; GENERIC: # BB#0: +; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pand: +; ATOM: # BB#0: +; ATOM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pand (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pand: +; SLM: # BB#0: +; SLM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pand (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pand: +; SANDY: # BB#0: +; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: pand (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pand: +; HASWELL: # BB#0: +; HASWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] +; HASWELL-NEXT: pand (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pand: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pand: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pand: +; SKX: # BB#0: +; SKX-NEXT: pand %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pand: +; BTVER2: # BB#0: +; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pand: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pand %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pand (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pandn: +; GENERIC: # BB#0: +; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pandn: +; ATOM: # BB#0: +; ATOM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pandn (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pandn: +; SLM: # BB#0: +; SLM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pandn (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pandn: +; SANDY: # BB#0: +; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pandn: +; HASWELL: # BB#0: +; HASWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] +; HASWELL-NEXT: pandn (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pandn: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pandn: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pandn: +; SKX: # BB#0: +; SKX-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pandn: +; BTVER2: # BB#0: +; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pandn: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pandn %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pandn (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pavgb: +; GENERIC: # BB#0: +; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pavgb: +; ATOM: # BB#0: +; ATOM-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pavgb (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pavgb: +; SLM: # BB#0: +; SLM-NEXT: pavgb %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pavgb (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pavgb: +; SANDY: # BB#0: +; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pavgb: +; HASWELL: # BB#0: +; HASWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pavgb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pavgb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pavgb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pavgb: +; SKX: # BB#0: +; SKX-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pavgb: +; BTVER2: # BB#0: +; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pavgb (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pavgb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pavgb %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pavgb (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pavgw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pavgw: +; ATOM: # BB#0: +; ATOM-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pavgw (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pavgw: +; SLM: # BB#0: +; SLM-NEXT: pavgw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pavgw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pavgw: +; SANDY: # BB#0: +; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pavgw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pavgw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pavgw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pavgw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pavgw: +; SKX: # BB#0: +; SKX-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pavgw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pavgw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pavgw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pavgw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pavgw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pcmpeqb: +; GENERIC: # BB#0: +; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pcmpeqb: +; ATOM: # BB#0: +; ATOM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pcmpeqb: +; SLM: # BB#0: +; SLM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pcmpeqb: +; SANDY: # BB#0: +; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pcmpeqb: +; HASWELL: # BB#0: +; HASWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pcmpeqb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pcmpeqb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pcmpeqb: +; SKX: # BB#0: +; SKX-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pcmpeqb: +; BTVER2: # BB#0: +; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pcmpeqb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pcmpeqd: +; GENERIC: # BB#0: +; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pcmpeqd: +; ATOM: # BB#0: +; ATOM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pcmpeqd: +; SLM: # BB#0: +; SLM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pcmpeqd: +; SANDY: # BB#0: +; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pcmpeqd: +; HASWELL: # BB#0: +; HASWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pcmpeqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pcmpeqd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pcmpeqd: +; SKX: # BB#0: +; SKX-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pcmpeqd: +; BTVER2: # BB#0: +; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pcmpeqd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pcmpeqw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pcmpeqw: +; ATOM: # BB#0: +; ATOM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pcmpeqw: +; SLM: # BB#0: +; SLM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pcmpeqw: +; SANDY: # BB#0: +; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pcmpeqw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pcmpeqw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pcmpeqw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pcmpeqw: +; SKX: # BB#0: +; SKX-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pcmpeqw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pcmpeqw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pcmpgtb: +; GENERIC: # BB#0: +; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pcmpgtb: +; ATOM: # BB#0: +; ATOM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pcmpgtb: +; SLM: # BB#0: +; SLM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pcmpgtb: +; SANDY: # BB#0: +; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pcmpgtb: +; HASWELL: # BB#0: +; HASWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pcmpgtb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pcmpgtb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pcmpgtb: +; SKX: # BB#0: +; SKX-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pcmpgtb: +; BTVER2: # BB#0: +; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pcmpgtb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pcmpgtd: +; GENERIC: # BB#0: +; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pcmpgtd: +; ATOM: # BB#0: +; ATOM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pcmpgtd: +; SLM: # BB#0: +; SLM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pcmpgtd: +; SANDY: # BB#0: +; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pcmpgtd: +; HASWELL: # BB#0: +; HASWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pcmpgtd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pcmpgtd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pcmpgtd: +; SKX: # BB#0: +; SKX-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pcmpgtd: +; BTVER2: # BB#0: +; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pcmpgtd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pcmpgtw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pcmpgtw: +; ATOM: # BB#0: +; ATOM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pcmpgtw: +; SLM: # BB#0: +; SLM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pcmpgtw: +; SANDY: # BB#0: +; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pcmpgtw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pcmpgtw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pcmpgtw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pcmpgtw: +; SKX: # BB#0: +; SKX-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pcmpgtw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pcmpgtw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone + +define i32 @test_pextrw(x86_mmx %a0) optsize { +; GENERIC-LABEL: test_pextrw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pextrw: +; ATOM: # BB#0: +; ATOM-NEXT: pextrw $0, %mm0, %eax # sched: [4:2.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pextrw: +; SLM: # BB#0: +; SLM-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pextrw: +; SANDY: # BB#0: +; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pextrw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pextrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pextrw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pextrw: +; SKX: # BB#0: +; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pextrw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pextrw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pextrw $0, %mm0, %eax # sched: [2:2.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0) + ret i32 %1 +} +declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone + +define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_phaddd: +; GENERIC: # BB#0: +; GENERIC-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] +; GENERIC-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_phaddd: +; ATOM: # BB#0: +; ATOM-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] +; ATOM-NEXT: phaddd (%rdi), %mm0 # sched: [4:2.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_phaddd: +; SLM: # BB#0: +; SLM-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: phaddd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_phaddd: +; SANDY: # BB#0: +; SANDY-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] +; SANDY-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_phaddd: +; HASWELL: # BB#0: +; HASWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: phaddd (%rdi), %mm0 # sched: [3:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_phaddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_phaddd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_phaddd: +; SKX: # BB#0: +; SKX-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_phaddd: +; BTVER2: # BB#0: +; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_phaddd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: phaddd %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: phaddd (%rdi), %mm0 # sched: [100:?] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_phaddsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] +; GENERIC-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_phaddsw: +; ATOM: # BB#0: +; ATOM-NEXT: phaddsw %mm1, %mm0 # sched: [5:2.50] +; ATOM-NEXT: phaddsw (%rdi), %mm0 # sched: [6:3.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_phaddsw: +; SLM: # BB#0: +; SLM-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: phaddsw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_phaddsw: +; SANDY: # BB#0: +; SANDY-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] +; SANDY-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_phaddsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [3:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_phaddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_phaddsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_phaddsw: +; SKX: # BB#0: +; SKX-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_phaddsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_phaddsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: phaddsw %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: phaddsw (%rdi), %mm0 # sched: [100:?] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_phaddw: +; GENERIC: # BB#0: +; GENERIC-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] +; GENERIC-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_phaddw: +; ATOM: # BB#0: +; ATOM-NEXT: phaddw %mm1, %mm0 # sched: [5:2.50] +; ATOM-NEXT: phaddw (%rdi), %mm0 # sched: [6:3.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_phaddw: +; SLM: # BB#0: +; SLM-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: phaddw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_phaddw: +; SANDY: # BB#0: +; SANDY-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] +; SANDY-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_phaddw: +; HASWELL: # BB#0: +; HASWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: phaddw (%rdi), %mm0 # sched: [3:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_phaddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_phaddw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_phaddw: +; SKX: # BB#0: +; SKX-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_phaddw: +; BTVER2: # BB#0: +; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_phaddw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: phaddw %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: phaddw (%rdi), %mm0 # sched: [100:?] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_phsubd: +; GENERIC: # BB#0: +; GENERIC-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] +; GENERIC-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_phsubd: +; ATOM: # BB#0: +; ATOM-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] +; ATOM-NEXT: phsubd (%rdi), %mm0 # sched: [4:2.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_phsubd: +; SLM: # BB#0: +; SLM-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: phsubd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_phsubd: +; SANDY: # BB#0: +; SANDY-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] +; SANDY-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_phsubd: +; HASWELL: # BB#0: +; HASWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: phsubd (%rdi), %mm0 # sched: [3:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_phsubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_phsubd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_phsubd: +; SKX: # BB#0: +; SKX-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_phsubd: +; BTVER2: # BB#0: +; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_phsubd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: phsubd %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: phsubd (%rdi), %mm0 # sched: [100:?] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_phsubsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] +; GENERIC-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_phsubsw: +; ATOM: # BB#0: +; ATOM-NEXT: phsubsw %mm1, %mm0 # sched: [5:2.50] +; ATOM-NEXT: phsubsw (%rdi), %mm0 # sched: [6:3.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_phsubsw: +; SLM: # BB#0: +; SLM-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: phsubsw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_phsubsw: +; SANDY: # BB#0: +; SANDY-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] +; SANDY-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_phsubsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [3:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_phsubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_phsubsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_phsubsw: +; SKX: # BB#0: +; SKX-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_phsubsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_phsubsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: phsubsw %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: phsubsw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_phsubw: +; GENERIC: # BB#0: +; GENERIC-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] +; GENERIC-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_phsubw: +; ATOM: # BB#0: +; ATOM-NEXT: phsubw %mm1, %mm0 # sched: [5:2.50] +; ATOM-NEXT: phsubw (%rdi), %mm0 # sched: [6:3.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_phsubw: +; SLM: # BB#0: +; SLM-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: phsubw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_phsubw: +; SANDY: # BB#0: +; SANDY-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] +; SANDY-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_phsubw: +; HASWELL: # BB#0: +; HASWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] +; HASWELL-NEXT: phsubw (%rdi), %mm0 # sched: [3:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_phsubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_phsubw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] +; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_phsubw: +; SKX: # BB#0: +; SKX-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] +; SKX-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_phsubw: +; BTVER2: # BB#0: +; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_phsubw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: phsubw %mm1, %mm0 # sched: [100:?] +; ZNVER1-NEXT: phsubw (%rdi), %mm0 # sched: [100:?] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { +; GENERIC-LABEL: test_pinsrw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50] +; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pinsrw: +; ATOM: # BB#0: +; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00] +; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pinsrw: +; SLM: # BB#0: +; SLM-NEXT: movswl (%rsi), %eax # sched: [4:1.00] +; SLM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; SLM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pinsrw: +; SANDY: # BB#0: +; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50] +; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pinsrw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] +; HASWELL-NEXT: movswl (%rsi), %eax # sched: [4:0.50] +; HASWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pinsrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [4:0.50] +; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pinsrw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] +; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [5:0.50] +; SKYLAKE-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pinsrw: +; SKX: # BB#0: +; SKX-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] +; SKX-NEXT: movswl (%rsi), %eax # sched: [5:0.50] +; SKX-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pinsrw: +; BTVER2: # BB#0: +; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00] +; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pinsrw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: movswl (%rsi), %eax # sched: [8:0.50] +; ZNVER1-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0) + %2 = load i16, i16 *%a2, align 2 + %3 = sext i16 %2 to i32 + %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone + +define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmaddwd: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmaddwd: +; ATOM: # BB#0: +; ATOM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmaddwd (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmaddwd: +; SLM: # BB#0: +; SLM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmaddwd: +; SANDY: # BB#0: +; SANDY-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmaddwd: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmaddwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmaddwd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmaddwd: +; SKX: # BB#0: +; SKX-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmaddwd: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmaddwd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmaddwd (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmaddubsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmaddubsw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pmaddubsw (%rdi), %mm0 # sched: [8:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmaddubsw: +; ATOM: # BB#0: +; ATOM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmaddubsw: +; SLM: # BB#0: +; SLM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmaddubsw: +; SANDY: # BB#0: +; SANDY-NEXT: pmaddubsw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pmaddubsw (%rdi), %mm0 # sched: [8:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmaddubsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmaddubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmaddubsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmaddubsw: +; SKX: # BB#0: +; SKX-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmaddubsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmaddubsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmaddubsw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmaxsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmaxsw: +; ATOM: # BB#0: +; ATOM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmaxsw: +; SLM: # BB#0: +; SLM-NEXT: pmaxsw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmaxsw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmaxsw: +; SANDY: # BB#0: +; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmaxsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmaxsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmaxsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmaxsw: +; SKX: # BB#0: +; SKX-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmaxsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmaxsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmaxsw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmaxsw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmaxub: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmaxub: +; ATOM: # BB#0: +; ATOM-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pmaxub (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmaxub: +; SLM: # BB#0: +; SLM-NEXT: pmaxub %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmaxub (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmaxub: +; SANDY: # BB#0: +; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmaxub: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmaxub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmaxub: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmaxub: +; SKX: # BB#0: +; SKX-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmaxub: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmaxub: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmaxub %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmaxub (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pminsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pminsw: +; ATOM: # BB#0: +; ATOM-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pminsw (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pminsw: +; SLM: # BB#0: +; SLM-NEXT: pminsw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pminsw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pminsw: +; SANDY: # BB#0: +; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pminsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pminsw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pminsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pminsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pminsw: +; SKX: # BB#0: +; SKX-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pminsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pminsw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pminsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pminsw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pminsw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pminub: +; GENERIC: # BB#0: +; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pminub: +; ATOM: # BB#0: +; ATOM-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pminub (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pminub: +; SLM: # BB#0: +; SLM-NEXT: pminub %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pminub (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pminub: +; SANDY: # BB#0: +; SANDY-NEXT: pminub %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pminub: +; HASWELL: # BB#0: +; HASWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: pminub (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pminub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pminub: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pminub: +; SKX: # BB#0: +; SKX-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pminub: +; BTVER2: # BB#0: +; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pminub (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pminub: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pminub %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pminub (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone + +define i32 @test_pmovmskb(x86_mmx %a0) optsize { +; GENERIC-LABEL: test_pmovmskb: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmovmskb %mm0, %eax +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmovmskb: +; ATOM: # BB#0: +; ATOM-NEXT: pmovmskb %mm0, %eax +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmovmskb: +; SLM: # BB#0: +; SLM-NEXT: pmovmskb %mm0, %eax +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmovmskb: +; SANDY: # BB#0: +; SANDY-NEXT: pmovmskb %mm0, %eax +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmovmskb: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmovmskb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmovmskb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmovmskb: +; SKX: # BB#0: +; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmovmskb: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmovmskb %mm0, %eax +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmovmskb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) + ret i32 %1 +} +declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone + +define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmulhrsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmulhrsw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pmulhrsw (%rdi), %mm0 # sched: [8:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmulhrsw: +; ATOM: # BB#0: +; ATOM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmulhrsw: +; SLM: # BB#0: +; SLM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmulhrsw: +; SANDY: # BB#0: +; SANDY-NEXT: pmulhrsw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pmulhrsw (%rdi), %mm0 # sched: [8:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmulhrsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmulhrsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmulhrsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmulhrsw: +; SKX: # BB#0: +; SKX-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmulhrsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmulhrsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmulhrsw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmulhw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmulhw: +; ATOM: # BB#0: +; ATOM-NEXT: pmulhw %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmulhw (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmulhw: +; SLM: # BB#0: +; SLM-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmulhw: +; SANDY: # BB#0: +; SANDY-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmulhw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmulhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmulhw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmulhw: +; SKX: # BB#0: +; SKX-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmulhw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmulhw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmulhw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmulhuw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmulhuw: +; ATOM: # BB#0: +; ATOM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmulhuw (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmulhuw: +; SLM: # BB#0: +; SLM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmulhuw: +; SANDY: # BB#0: +; SANDY-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmulhuw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmulhuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmulhuw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmulhuw: +; SKX: # BB#0: +; SKX-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmulhuw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmulhuw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmulhuw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmullw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmullw: +; ATOM: # BB#0: +; ATOM-NEXT: pmullw %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmullw (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmullw: +; SLM: # BB#0: +; SLM-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmullw: +; SANDY: # BB#0: +; SANDY-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmullw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmullw (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmullw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmullw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmullw: +; SKX: # BB#0: +; SKX-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmullw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmullw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmullw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pmuludq: +; GENERIC: # BB#0: +; GENERIC-NEXT: pmuludq %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pmuludq: +; ATOM: # BB#0: +; ATOM-NEXT: pmuludq %mm1, %mm0 # sched: [4:4.00] +; ATOM-NEXT: pmuludq (%rdi), %mm0 # sched: [4:4.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pmuludq: +; SLM: # BB#0: +; SLM-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pmuludq: +; SANDY: # BB#0: +; SANDY-NEXT: pmuludq %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pmuludq: +; HASWELL: # BB#0: +; HASWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pmuludq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pmuludq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] +; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pmuludq: +; SKX: # BB#0: +; SKX-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] +; SKX-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pmuludq: +; BTVER2: # BB#0: +; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pmuludq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: pmuludq (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_por: +; GENERIC: # BB#0: +; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: por (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_por: +; ATOM: # BB#0: +; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: por (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_por: +; SLM: # BB#0: +; SLM-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: por (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_por: +; SANDY: # BB#0: +; SANDY-NEXT: por %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: por (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_por: +; HASWELL: # BB#0: +; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; HASWELL-NEXT: por (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_por: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_por: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_por: +; SKX: # BB#0: +; SKX-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: por (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_por: +; BTVER2: # BB#0: +; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: por (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_por: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: por %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: por (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psadbw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psadbw (%rdi), %mm0 # sched: [9:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psadbw: +; ATOM: # BB#0: +; ATOM-NEXT: psadbw %mm1, %mm0 # sched: [4:2.00] +; ATOM-NEXT: psadbw (%rdi), %mm0 # sched: [4:2.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psadbw: +; SLM: # BB#0: +; SLM-NEXT: psadbw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psadbw: +; SANDY: # BB#0: +; SANDY-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] +; SANDY-NEXT: psadbw (%rdi), %mm0 # sched: [9:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psadbw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] +; HASWELL-NEXT: psadbw (%rdi), %mm0 # sched: [5:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psadbw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] +; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psadbw: +; SKX: # BB#0: +; SKX-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] +; SKX-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psadbw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psadbw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psadbw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: psadbw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { +; GENERIC-LABEL: test_pshufb: +; GENERIC: # BB#0: +; GENERIC-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pshufb: +; ATOM: # BB#0: +; ATOM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pshufb: +; SLM: # BB#0: +; SLM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: pshufb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pshufb: +; SANDY: # BB#0: +; SANDY-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] +; SANDY-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pshufb: +; HASWELL: # BB#0: +; HASWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pshufb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pshufb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pshufb: +; SKX: # BB#0: +; SKX-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pshufb: +; BTVER2: # BB#0: +; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pshufb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pshufb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pshufb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pshufw(x86_mmx *%a0) optsize { +; GENERIC-LABEL: test_pshufw: +; GENERIC: # BB#0: +; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00] +; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pshufw: +; ATOM: # BB#0: +; ATOM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] +; ATOM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pshufw: +; SLM: # BB#0: +; SLM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00] +; SLM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pshufw: +; SANDY: # BB#0: +; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [5:1.00] +; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pshufw: +; HASWELL: # BB#0: +; HASWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] +; HASWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pshufw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] +; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pshufw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] +; SKYLAKE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pshufw: +; SKX: # BB#0: +; SKX-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] +; SKX-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pshufw: +; BTVER2: # BB#0: +; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] +; BTVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pshufw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50] +; ZNVER1-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = load x86_mmx, x86_mmx *%a0, align 8 + %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) + %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone + +define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psignb: +; GENERIC: # BB#0: +; GENERIC-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psignb: +; ATOM: # BB#0: +; ATOM-NEXT: psignb %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: psignb (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psignb: +; SLM: # BB#0: +; SLM-NEXT: psignb %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: psignb (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psignb: +; SANDY: # BB#0: +; SANDY-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; SANDY-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psignb: +; HASWELL: # BB#0: +; HASWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psignb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psignb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psignb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psignb: +; SKX: # BB#0: +; SKX-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psignb: +; BTVER2: # BB#0: +; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: psignb (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psignb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psignb %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: psignb (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psignd: +; GENERIC: # BB#0: +; GENERIC-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psignd: +; ATOM: # BB#0: +; ATOM-NEXT: psignd %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: psignd (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psignd: +; SLM: # BB#0: +; SLM-NEXT: psignd %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: psignd (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psignd: +; SANDY: # BB#0: +; SANDY-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; SANDY-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psignd: +; HASWELL: # BB#0: +; HASWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psignd (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psignd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psignd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psignd: +; SKX: # BB#0: +; SKX-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psignd: +; BTVER2: # BB#0: +; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: psignd (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psignd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psignd %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: psignd (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psignw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; GENERIC-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psignw: +; ATOM: # BB#0: +; ATOM-NEXT: psignw %mm1, %mm0 # sched: [1:1.00] +; ATOM-NEXT: psignw (%rdi), %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psignw: +; SLM: # BB#0: +; SLM-NEXT: psignw %mm1, %mm0 # sched: [4:1.00] +; SLM-NEXT: psignw (%rdi), %mm0 # sched: [7:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psignw: +; SANDY: # BB#0: +; SANDY-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; SANDY-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psignw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psignw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psignw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psignw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psignw: +; SKX: # BB#0: +; SKX-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psignw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [2:1.00] +; BTVER2-NEXT: psignw (%rdi), %mm0 # sched: [7:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psignw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psignw %mm1, %mm0 # sched: [4:1.00] +; ZNVER1-NEXT: psignw (%rdi), %mm0 # sched: [11:1.00] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pslld: +; GENERIC: # BB#0: +; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pslld: +; ATOM: # BB#0: +; ATOM-NEXT: pslld %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: pslld (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: pslld $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pslld: +; SLM: # BB#0: +; SLM-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: pslld (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pslld: +; SANDY: # BB#0: +; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pslld: +; HASWELL: # BB#0: +; HASWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: pslld (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pslld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pslld: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pslld: +; SKX: # BB#0: +; SKX-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: pslld $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pslld: +; BTVER2: # BB#0: +; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: pslld $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pslld: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pslld %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: pslld $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone + +define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psllq: +; GENERIC: # BB#0: +; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psllq: +; ATOM: # BB#0: +; ATOM-NEXT: psllq %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psllq (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psllq $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psllq: +; SLM: # BB#0: +; SLM-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psllq (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psllq: +; SANDY: # BB#0: +; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psllq: +; HASWELL: # BB#0: +; HASWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psllq (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psllq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psllq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psllq: +; SKX: # BB#0: +; SKX-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psllq $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psllq: +; BTVER2: # BB#0: +; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psllq $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psllq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psllq %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psllq $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone + +define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psllw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psllw: +; ATOM: # BB#0: +; ATOM-NEXT: psllw %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psllw (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psllw $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psllw: +; SLM: # BB#0: +; SLM-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psllw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psllw: +; SANDY: # BB#0: +; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psllw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psllw (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psllw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psllw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psllw: +; SKX: # BB#0: +; SKX-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psllw $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psllw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psllw $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psllw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psllw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psllw $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone + +define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psrad: +; GENERIC: # BB#0: +; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psrad: +; ATOM: # BB#0: +; ATOM-NEXT: psrad %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psrad (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psrad $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psrad: +; SLM: # BB#0: +; SLM-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psrad (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psrad: +; SANDY: # BB#0: +; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psrad: +; HASWELL: # BB#0: +; HASWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrad (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psrad: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psrad: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psrad: +; SKX: # BB#0: +; SKX-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psrad $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psrad: +; BTVER2: # BB#0: +; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psrad $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psrad: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psrad %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psrad $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone + +define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psraw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psraw: +; ATOM: # BB#0: +; ATOM-NEXT: psraw %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psraw (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psraw $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psraw: +; SLM: # BB#0: +; SLM-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psraw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psraw: +; SANDY: # BB#0: +; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psraw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psraw (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psraw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psraw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psraw: +; SKX: # BB#0: +; SKX-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psraw $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psraw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psraw $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psraw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psraw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psraw $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone + +define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psrld: +; GENERIC: # BB#0: +; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psrld: +; ATOM: # BB#0: +; ATOM-NEXT: psrld %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psrld (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psrld $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psrld: +; SLM: # BB#0: +; SLM-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psrld (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psrld: +; SANDY: # BB#0: +; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psrld: +; HASWELL: # BB#0: +; HASWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrld (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psrld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psrld: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psrld: +; SKX: # BB#0: +; SKX-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psrld $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psrld: +; BTVER2: # BB#0: +; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psrld $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psrld: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psrld %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psrld $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone + +define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psrlq: +; GENERIC: # BB#0: +; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psrlq: +; ATOM: # BB#0: +; ATOM-NEXT: psrlq %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psrlq (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psrlq $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psrlq: +; SLM: # BB#0: +; SLM-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psrlq (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psrlq: +; SANDY: # BB#0: +; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psrlq: +; HASWELL: # BB#0: +; HASWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrlq (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psrlq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psrlq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psrlq: +; SKX: # BB#0: +; SKX-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psrlq $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psrlq: +; BTVER2: # BB#0: +; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psrlq $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psrlq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psrlq %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psrlq $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone + +define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psrlw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psrlw: +; ATOM: # BB#0: +; ATOM-NEXT: psrlw %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psrlw (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: psrlw $7, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psrlw: +; SLM: # BB#0: +; SLM-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; SLM-NEXT: psrlw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psrlw: +; SANDY: # BB#0: +; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psrlw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrlw (%rdi), %mm0 # sched: [1:1.00] +; HASWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psrlw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psrlw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psrlw: +; SKX: # BB#0: +; SKX-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: psrlw $7, %mm0 # sched: [1:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psrlw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: psrlw $7, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psrlw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psrlw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: psrlw $7, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2) + %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7) + %5 = bitcast x86_mmx %4 to i64 + ret i64 %5 +} +declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone +declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone + +define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubb: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubb: +; ATOM: # BB#0: +; ATOM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubb: +; SLM: # BB#0: +; SLM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubb: +; SANDY: # BB#0: +; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubb: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubb: +; SKX: # BB#0: +; SKX-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubb: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubd: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubd: +; ATOM: # BB#0: +; ATOM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubd (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubd: +; SLM: # BB#0: +; SLM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubd (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubd: +; SANDY: # BB#0: +; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubd: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubd (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubd: +; SKX: # BB#0: +; SKX-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubd: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubd (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubd %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubd (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubq: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubq: +; ATOM: # BB#0: +; ATOM-NEXT: psubq %mm1, %mm0 # sched: [2:1.00] +; ATOM-NEXT: psubq (%rdi), %mm0 # sched: [3:1.50] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubq: +; SLM: # BB#0: +; SLM-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubq (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubq: +; SANDY: # BB#0: +; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubq: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubq (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubq: +; SKX: # BB#0: +; SKX-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubq: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubq (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubq %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubq (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubsb: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubsb: +; ATOM: # BB#0: +; ATOM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubsb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubsb: +; SLM: # BB#0: +; SLM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubsb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubsb: +; SANDY: # BB#0: +; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubsb: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubsb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubsb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubsb: +; SKX: # BB#0: +; SKX-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubsb: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubsb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubsb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubsb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubsw: +; ATOM: # BB#0: +; ATOM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubsw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubsw: +; SLM: # BB#0: +; SLM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubsw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubsw: +; SANDY: # BB#0: +; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubsw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubsw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubsw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubsw: +; SKX: # BB#0: +; SKX-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubsw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubsw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubsw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubsw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubusb: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubusb: +; ATOM: # BB#0: +; ATOM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubusb (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubusb: +; SLM: # BB#0: +; SLM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubusb (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubusb: +; SANDY: # BB#0: +; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubusb: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubusb (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubusb: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubusb: +; SKX: # BB#0: +; SKX-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubusb: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubusb: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubusb %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubusb (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubusw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubusw: +; ATOM: # BB#0: +; ATOM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubusw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubusw: +; SLM: # BB#0: +; SLM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubusw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubusw: +; SANDY: # BB#0: +; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubusw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubusw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubusw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] +; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubusw: +; SKX: # BB#0: +; SKX-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] +; SKX-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubusw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubusw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubusw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubusw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_psubw: +; GENERIC: # BB#0: +; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] +; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [7:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_psubw: +; ATOM: # BB#0: +; ATOM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: psubw (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_psubw: +; SLM: # BB#0: +; SLM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: psubw (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_psubw: +; SANDY: # BB#0: +; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] +; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [7:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_psubw: +; HASWELL: # BB#0: +; HASWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; HASWELL-NEXT: psubw (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_psubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_psubw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_psubw: +; SKX: # BB#0: +; SKX-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_psubw: +; BTVER2: # BB#0: +; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: psubw (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_psubw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: psubw %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: psubw (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_punpckhbw: +; GENERIC: # BB#0: +; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_punpckhbw: +; ATOM: # BB#0: +; ATOM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] +; ATOM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_punpckhbw: +; SLM: # BB#0: +; SLM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; SLM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_punpckhbw: +; SANDY: # BB#0: +; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_punpckhbw: +; HASWELL: # BB#0: +; HASWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; HASWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_punpckhbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_punpckhbw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_punpckhbw: +; SKX: # BB#0: +; SKX-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] +; SKX-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_punpckhbw: +; BTVER2: # BB#0: +; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] +; BTVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_punpckhbw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25] +; ZNVER1-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_punpckhdq: +; GENERIC: # BB#0: +; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_punpckhdq: +; ATOM: # BB#0: +; ATOM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] +; ATOM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_punpckhdq: +; SLM: # BB#0: +; SLM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; SLM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_punpckhdq: +; SANDY: # BB#0: +; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_punpckhdq: +; HASWELL: # BB#0: +; HASWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; HASWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_punpckhdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_punpckhdq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_punpckhdq: +; SKX: # BB#0: +; SKX-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] +; SKX-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_punpckhdq: +; BTVER2: # BB#0: +; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] +; BTVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_punpckhdq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25] +; ZNVER1-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_punpckhwd: +; GENERIC: # BB#0: +; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_punpckhwd: +; ATOM: # BB#0: +; ATOM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] +; ATOM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_punpckhwd: +; SLM: # BB#0: +; SLM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SLM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_punpckhwd: +; SANDY: # BB#0: +; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_punpckhwd: +; HASWELL: # BB#0: +; HASWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; HASWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_punpckhwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_punpckhwd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_punpckhwd: +; SKX: # BB#0: +; SKX-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SKX-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_punpckhwd: +; BTVER2: # BB#0: +; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] +; BTVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_punpckhwd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] +; ZNVER1-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_punpcklbw: +; GENERIC: # BB#0: +; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_punpcklbw: +; ATOM: # BB#0: +; ATOM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; ATOM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_punpcklbw: +; SLM: # BB#0: +; SLM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SLM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_punpcklbw: +; SANDY: # BB#0: +; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_punpcklbw: +; HASWELL: # BB#0: +; HASWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; HASWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_punpcklbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_punpcklbw: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_punpcklbw: +; SKX: # BB#0: +; SKX-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] +; SKX-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_punpcklbw: +; BTVER2: # BB#0: +; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] +; BTVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_punpcklbw: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] +; ZNVER1-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_punpckldq: +; GENERIC: # BB#0: +; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_punpckldq: +; ATOM: # BB#0: +; ATOM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; ATOM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_punpckldq: +; SLM: # BB#0: +; SLM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; SLM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_punpckldq: +; SANDY: # BB#0: +; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_punpckldq: +; HASWELL: # BB#0: +; HASWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; HASWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_punpckldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_punpckldq: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_punpckldq: +; SKX: # BB#0: +; SKX-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] +; SKX-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_punpckldq: +; BTVER2: # BB#0: +; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50] +; BTVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_punpckldq: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25] +; ZNVER1-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_punpcklwd: +; GENERIC: # BB#0: +; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_punpcklwd: +; ATOM: # BB#0: +; ATOM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; ATOM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_punpcklwd: +; SLM: # BB#0: +; SLM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; SLM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_punpcklwd: +; SANDY: # BB#0: +; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_punpcklwd: +; HASWELL: # BB#0: +; HASWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; HASWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_punpcklwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_punpcklwd: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_punpcklwd: +; SKX: # BB#0: +; SKX-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] +; SKX-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_punpcklwd: +; BTVER2: # BB#0: +; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50] +; BTVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_punpcklwd: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25] +; ZNVER1-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { +; GENERIC-LABEL: test_pxor: +; GENERIC: # BB#0: +; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] +; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [5:1.00] +; GENERIC-NEXT: movd %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; +; ATOM-LABEL: test_pxor: +; ATOM: # BB#0: +; ATOM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] +; ATOM-NEXT: pxor (%rdi), %mm0 # sched: [1:1.00] +; ATOM-NEXT: movd %mm0, %rax # sched: [3:3.00] +; ATOM-NEXT: retq # sched: [79:39.50] +; +; SLM-LABEL: test_pxor: +; SLM: # BB#0: +; SLM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] +; SLM-NEXT: pxor (%rdi), %mm0 # sched: [4:1.00] +; SLM-NEXT: movd %mm0, %rax # sched: [1:0.50] +; SLM-NEXT: retq # sched: [4:1.00] +; +; SANDY-LABEL: test_pxor: +; SANDY: # BB#0: +; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:1.00] +; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [5:1.00] +; SANDY-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [1:1.00] +; +; HASWELL-LABEL: test_pxor: +; HASWELL: # BB#0: +; HASWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] +; HASWELL-NEXT: pxor (%rdi), %mm0 # sched: [1:0.50] +; HASWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] +; +; BROADWELL-LABEL: test_pxor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] +; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; +; SKYLAKE-LABEL: test_pxor: +; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] +; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] +; SKYLAKE-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] +; +; SKX-LABEL: test_pxor: +; SKX: # BB#0: +; SKX-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] +; SKX-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] +; SKX-NEXT: movd %mm0, %rax # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] +; +; BTVER2-LABEL: test_pxor: +; BTVER2: # BB#0: +; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] +; BTVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] +; BTVER2-NEXT: movd %mm0, %rax # sched: [1:0.17] +; BTVER2-NEXT: retq # sched: [4:1.00] +; +; ZNVER1-LABEL: test_pxor: +; ZNVER1: # BB#0: +; ZNVER1-NEXT: pxor %mm1, %mm0 # sched: [1:0.25] +; ZNVER1-NEXT: pxor (%rdi), %mm0 # sched: [8:0.50] +; ZNVER1-NEXT: movd %mm0, %rax # sched: [2:1.00] +; ZNVER1-NEXT: retq # sched: [1:0.50] + %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1) + %2 = load x86_mmx, x86_mmx *%a2, align 8 + %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2) + %4 = bitcast x86_mmx %3 to i64 + ret i64 %4 +} +declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/movbe-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/movbe-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/movbe-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/movbe-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -37,11 +38,17 @@ ; HASWELL-NEXT: movbew %si, (%rdx) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movbe_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movbew (%rdi), %ax # sched: [1:0.50] +; BROADWELL-NEXT: movbew %si, (%rdx) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movbe_i16: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: movbew (%rdi), %ax # sched: [1:0.50] -; SKYLAKE-NEXT: movbew %si, (%rdx) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movbew (%rdi), %ax # sched: [6:0.50] +; SKYLAKE-NEXT: movbew %si, (%rdx) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movbe_i16: ; BTVER2: # BB#0: @@ -91,11 +98,17 @@ ; HASWELL-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movbe_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movbel (%rdi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movbe_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: movbel (%rdi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movbel (%rdi), %eax # sched: [6:0.50] +; SKYLAKE-NEXT: movbel %esi, (%rdx) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movbe_i32: ; BTVER2: # BB#0: @@ -145,11 +158,17 @@ ; HASWELL-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movbe_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movbeq (%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movbe_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: movbeq (%rdi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] +; SKYLAKE-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movbe_i64: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/popcnt-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/popcnt-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/popcnt-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/popcnt-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -43,13 +44,21 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctpop_i16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: popcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: popcntw %di, %ax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctpop_i16: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: popcntw (%rsi), %cx # sched: [3:1.00] +; SKYLAKE-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] ; SKYLAKE-NEXT: popcntw %di, %ax # sched: [3:1.00] ; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: # kill: %AX %AX %EAX -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ctpop_i16: ; BTVER2: # BB#0: @@ -103,12 +112,19 @@ ; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctpop_i32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: popcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00] +; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctpop_i32: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: popcntl (%rsi), %ecx # sched: [3:1.00] +; SKYLAKE-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] ; SKYLAKE-NEXT: popcntl %edi, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ctpop_i32: ; BTVER2: # BB#0: @@ -160,12 +176,19 @@ ; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ctpop_i64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: popcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00] +; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ctpop_i64: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: popcntq (%rsi), %rcx # sched: [3:1.00] +; SKYLAKE-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] ; SKYLAKE-NEXT: popcntq %rdi, %rax # sched: [3:1.00] ; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ctpop_i64: ; BTVER2: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse2-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse2-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse2-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse2-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -41,11 +42,17 @@ ; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addpd: ; SKX: # BB#0: @@ -101,11 +108,17 @@ ; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsd: ; SKX: # BB#0: @@ -166,12 +179,19 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andpd: ; SKX: # BB#0: @@ -240,12 +260,19 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # BB#0: @@ -316,12 +343,19 @@ ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmppd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33] -; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmppd: ; SKX: # BB#0: @@ -384,11 +418,17 @@ ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpsd: ; SKX: # BB#0: @@ -489,19 +529,33 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_comisd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_comisd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %cl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] ; SKYLAKE-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %dl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_comisd: ; SKX: # BB#0: @@ -588,12 +642,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [5:1.00] +; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2pd: ; SKX: # BB#0: @@ -661,12 +722,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtdq2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [4:0.50] +; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # BB#0: @@ -732,12 +800,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2dq: ; SKX: # BB#0: @@ -804,12 +879,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtpd2ps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2ps: ; SKX: # BB#0: @@ -876,12 +958,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2dq: ; SKX: # BB#0: @@ -948,12 +1037,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtps2pd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtps2pd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [4:0.50] +; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2pd: ; SKX: # BB#0: @@ -1020,12 +1116,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsd2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsd2si: ; SKX: # BB#0: @@ -1093,12 +1196,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsd2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsd2siq: ; SKX: # BB#0: @@ -1172,13 +1282,21 @@ ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsd2ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKYLAKE-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsd2ss: ; SKX: # BB#0: @@ -1246,12 +1364,19 @@ ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2sd: ; SKX: # BB#0: @@ -1316,12 +1441,19 @@ ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2sdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2sdq: ; SKX: # BB#0: @@ -1394,13 +1526,21 @@ ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtss2sd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] +; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtss2sd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKYLAKE-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2sd: ; SKX: # BB#0: @@ -1469,12 +1609,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttpd2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttpd2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttpd2dq: ; SKX: # BB#0: @@ -1542,12 +1689,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttps2dq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttps2dq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttps2dq: ; SKX: # BB#0: @@ -1612,12 +1766,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttsd2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttsd2si: ; SKX: # BB#0: @@ -1682,12 +1843,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttsd2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvttsd2si (%rdi), %rax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttsd2siq: ; SKX: # BB#0: @@ -1747,11 +1915,17 @@ ; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_divpd: ; SKX: # BB#0: @@ -1807,11 +1981,17 @@ ; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_divsd: ; SKX: # BB#0: @@ -1868,10 +2048,15 @@ ; HASWELL-NEXT: lfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lfence: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: lfence # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lfence: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: lfence # sched: [2:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_lfence: ; SKX: # BB#0: @@ -1924,10 +2109,15 @@ ; HASWELL-NEXT: mfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mfence: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: mfence # sched: [2:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mfence: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: mfence # sched: [2:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: mfence # sched: [3:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mfence: ; SKX: # BB#0: @@ -1978,10 +2168,15 @@ ; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maskmovdqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maskmovdqu: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maskmovdqu: ; SKX: # BB#0: @@ -2033,11 +2228,17 @@ ; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxpd: ; SKX: # BB#0: @@ -2094,11 +2295,17 @@ ; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxsd: ; SKX: # BB#0: @@ -2155,11 +2362,17 @@ ; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minpd: ; SKX: # BB#0: @@ -2216,11 +2429,17 @@ ; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minsd: ; SKX: # BB#0: @@ -2282,12 +2501,19 @@ ; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movapd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movapd: ; SKX: # BB#0: @@ -2351,12 +2577,19 @@ ; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movdqa: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movdqa: ; SKX: # BB#0: @@ -2420,12 +2653,19 @@ ; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movdqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movdqu: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movdqu: ; SKX: # BB#0: @@ -2504,15 +2744,25 @@ ; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] +; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movd: ; SKX: # BB#0: @@ -2605,15 +2855,25 @@ ; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movd_64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movd_64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] ; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movd_64: ; SKX: # BB#0: @@ -2691,12 +2951,19 @@ ; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movhpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movhpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movhpd: ; SKX: # BB#0: @@ -2763,12 +3030,19 @@ ; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movlpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movlpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlpd: ; SKX: # BB#0: @@ -2827,10 +3101,15 @@ ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movmskpd: ; SKX: # BB#0: @@ -2884,11 +3163,17 @@ ; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntdqa: ; SKX: # BB#0: @@ -2943,11 +3228,17 @@ ; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntpd: ; SKX: # BB#0: @@ -3007,12 +3298,19 @@ ; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movq_mem: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movq_mem: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movq_mem: ; SKX: # BB#0: @@ -3075,11 +3373,17 @@ ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movq_reg: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movq_reg: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movq_reg: ; SKX: # BB#0: @@ -3139,12 +3443,19 @@ ; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsd_mem: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsd_mem: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] +; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] ; SKYLAKE-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsd_mem: ; SKX: # BB#0: @@ -3205,10 +3516,15 @@ ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsd_reg: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsd_reg: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsd_reg: ; SKX: # BB#0: @@ -3264,12 +3580,19 @@ ; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movupd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movupd: ; SKX: # BB#0: @@ -3328,11 +3651,17 @@ ; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulpd: ; SKX: # BB#0: @@ -3388,11 +3717,17 @@ ; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulsd: ; SKX: # BB#0: @@ -3453,12 +3788,19 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_orpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_orpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_orpd: ; SKX: # BB#0: @@ -3526,11 +3868,17 @@ ; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packssdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packssdw: ; SKX: # BB#0: @@ -3592,11 +3940,17 @@ ; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packsswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packsswb: ; SKX: # BB#0: @@ -3658,11 +4012,17 @@ ; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packuswb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packuswb: ; SKX: # BB#0: @@ -3724,11 +4084,17 @@ ; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddb: ; SKX: # BB#0: @@ -3788,11 +4154,17 @@ ; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddd: ; SKX: # BB#0: @@ -3848,11 +4220,17 @@ ; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddq: ; SKX: # BB#0: @@ -3912,11 +4290,17 @@ ; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddsb: ; SKX: # BB#0: @@ -3977,11 +4361,17 @@ ; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddsw: ; SKX: # BB#0: @@ -4042,11 +4432,17 @@ ; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddusb: ; SKX: # BB#0: @@ -4107,11 +4503,17 @@ ; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddusw: ; SKX: # BB#0: @@ -4172,11 +4574,17 @@ ; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_paddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_paddw: ; SKX: # BB#0: @@ -4237,12 +4645,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pand: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pand: ; SKX: # BB#0: @@ -4313,12 +4728,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pandn: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pandn: ; SKX: # BB#0: @@ -4384,11 +4806,17 @@ ; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pavgb: ; SKX: # BB#0: @@ -4458,11 +4886,17 @@ ; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pavgw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pavgw: ; SKX: # BB#0: @@ -4535,12 +4969,19 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqb: ; SKX: # BB#0: @@ -4609,12 +5050,19 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqd: ; SKX: # BB#0: @@ -4683,12 +5131,19 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqw: ; SKX: # BB#0: @@ -4758,12 +5213,19 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtb: ; SKX: # BB#0: @@ -4833,12 +5295,19 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtd: ; SKX: # BB#0: @@ -4908,12 +5377,19 @@ ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtw: ; SKX: # BB#0: @@ -4975,11 +5451,17 @@ ; HASWELL-NEXT: # kill: %AX %AX %EAX ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: # kill: %AX %AX %EAX +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: # kill: %AX %AX %EAX -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrw: ; SKX: # BB#0: @@ -5037,11 +5519,17 @@ ; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pinsrw: ; SKX: # BB#0: @@ -5105,11 +5593,17 @@ ; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddwd: ; SKX: # BB#0: @@ -5171,11 +5665,17 @@ ; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsw: ; SKX: # BB#0: @@ -5236,11 +5736,17 @@ ; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxub: ; SKX: # BB#0: @@ -5301,11 +5807,17 @@ ; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsw: ; SKX: # BB#0: @@ -5366,11 +5878,17 @@ ; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminub: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminub: ; SKX: # BB#0: @@ -5424,10 +5942,15 @@ ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovmskb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovmskb: ; SKX: # BB#0: @@ -5479,11 +6002,17 @@ ; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhuw: ; SKX: # BB#0: @@ -5540,11 +6069,17 @@ ; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhw: ; SKX: # BB#0: @@ -5601,11 +6136,17 @@ ; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmullw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmullw: ; SKX: # BB#0: @@ -5669,11 +6210,17 @@ ; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuludq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuludq: ; SKX: # BB#0: @@ -5736,12 +6283,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_por: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_por: ; SKX: # BB#0: @@ -5809,11 +6363,17 @@ ; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psadbw: ; SKX: # BB#0: @@ -5878,12 +6438,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] +; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufd: ; SKX: # BB#0: @@ -5950,12 +6517,19 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufhw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufhw: ; SKX: # BB#0: @@ -6022,12 +6596,19 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshuflw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshuflw: ; SKX: # BB#0: @@ -6092,12 +6673,19 @@ ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pslld: ; SKX: # BB#0: @@ -6160,10 +6748,15 @@ ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pslldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pslldq: ; SKX: # BB#0: @@ -6219,12 +6812,19 @@ ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllq: ; SKX: # BB#0: @@ -6291,12 +6891,19 @@ ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psllw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psllw: ; SKX: # BB#0: @@ -6363,12 +6970,19 @@ ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrad: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrad: ; SKX: # BB#0: @@ -6435,12 +7049,19 @@ ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psraw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psraw: ; SKX: # BB#0: @@ -6507,12 +7128,19 @@ ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrld: ; SKX: # BB#0: @@ -6575,10 +7203,15 @@ ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrldq: ; SKX: # BB#0: @@ -6634,12 +7267,19 @@ ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlq: ; SKX: # BB#0: @@ -6706,12 +7346,19 @@ ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psrlw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psrlw: ; SKX: # BB#0: @@ -6777,11 +7424,17 @@ ; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubb: ; SKX: # BB#0: @@ -6841,11 +7494,17 @@ ; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubd: ; SKX: # BB#0: @@ -6901,11 +7560,17 @@ ; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubq: ; SKX: # BB#0: @@ -6965,11 +7630,17 @@ ; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubsb: ; SKX: # BB#0: @@ -7030,11 +7701,17 @@ ; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubsw: ; SKX: # BB#0: @@ -7095,11 +7772,17 @@ ; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubusb: ; SKX: # BB#0: @@ -7160,11 +7843,17 @@ ; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubusw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubusw: ; SKX: # BB#0: @@ -7225,11 +7914,17 @@ ; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psubw: ; SKX: # BB#0: @@ -7289,11 +7984,17 @@ ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhbw: ; SKX: # BB#0: @@ -7356,12 +8057,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhdq: ; SKX: # BB#0: @@ -7426,12 +8134,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhqdq: ; SKX: # BB#0: @@ -7495,11 +8210,17 @@ ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckhwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckhwd: ; SKX: # BB#0: @@ -7559,11 +8280,17 @@ ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklbw: ; SKX: # BB#0: @@ -7626,12 +8353,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpckldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpckldq: ; SKX: # BB#0: @@ -7696,12 +8430,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklqdq: ; SKX: # BB#0: @@ -7765,11 +8506,17 @@ ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_punpcklwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_punpcklwd: ; SKX: # BB#0: @@ -7830,12 +8577,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pxor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pxor: ; SKX: # BB#0: @@ -7900,12 +8654,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] +; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufpd: ; SKX: # BB#0: @@ -7971,12 +8732,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] +; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] -; SKYLAKE-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [18:1.00] +; SKYLAKE-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtpd: ; SKX: # BB#0: @@ -8049,13 +8817,21 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] -; SKYLAKE-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] ; SKYLAKE-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtsd: ; SKX: # BB#0: @@ -8119,11 +8895,17 @@ ; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subpd: ; SKX: # BB#0: @@ -8179,11 +8961,17 @@ ; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subsd: ; SKX: # BB#0: @@ -8279,19 +9067,33 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ucomisd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ucomisd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %cl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] ; SKYLAKE-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %dl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_ucomisd: ; SKX: # BB#0: @@ -8378,12 +9180,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhpd: ; SKX: # BB#0: @@ -8454,12 +9263,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] +; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklpd: ; SKX: # BB#0: @@ -8524,12 +9340,19 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_xorpd: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse3-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse3-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse3-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse3-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -41,11 +42,17 @@ ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubpd: ; SKX: # BB#0: @@ -102,11 +109,17 @@ ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubps: ; SKX: # BB#0: @@ -163,11 +176,17 @@ ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_haddpd: ; SKX: # BB#0: @@ -224,11 +243,17 @@ ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_haddps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_haddps: ; SKX: # BB#0: @@ -285,11 +310,17 @@ ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_hsubpd: ; SKX: # BB#0: @@ -346,11 +377,17 @@ ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_hsubps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_hsubps: ; SKX: # BB#0: @@ -404,10 +441,15 @@ ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_lddqu: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_lddqu: ; SKX: # BB#0: @@ -464,12 +506,19 @@ ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_monitor: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: monitor # sched: [100:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: monitor # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_monitor: ; SKX: # BB#0: @@ -533,12 +582,19 @@ ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movddup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50] +; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50] +; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movddup: ; SKX: # BB#0: @@ -604,12 +660,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movshdup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] +; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50] +; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movshdup: ; SKX: # BB#0: @@ -675,12 +738,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movsldup: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] +; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50] +; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsldup: ; SKX: # BB#0: @@ -745,12 +815,19 @@ ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mwait: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] +; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: mwait # sched: [20:2.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: mwait # sched: [20:2.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mwait: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse41-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse41-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse41-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse41-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -38,12 +39,19 @@ ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # BB#0: @@ -98,11 +106,17 @@ ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendps: ; SKX: # BB#0: @@ -158,11 +172,17 @@ ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendvpd: ; SKX: # BB#0: @@ -219,11 +239,17 @@ ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_blendvps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendvps: ; SKX: # BB#0: @@ -274,11 +300,17 @@ ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_dppd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_dppd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_dppd: ; SKX: # BB#0: @@ -329,11 +361,17 @@ ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_dpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] +; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] -; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.33] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_dpps: ; SKX: # BB#0: @@ -384,11 +422,17 @@ ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_insertps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_insertps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_insertps: ; SKX: # BB#0: @@ -435,10 +479,15 @@ ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntdqa: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntdqa: ; SKX: # BB#0: @@ -484,11 +533,17 @@ ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mpsadbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [4:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mpsadbw: ; SKX: # BB#0: @@ -540,11 +595,17 @@ ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_packusdw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_packusdw: ; SKX: # BB#0: @@ -602,11 +663,17 @@ ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendvb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pblendvb: ; SKX: # BB#0: @@ -657,11 +724,17 @@ ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pblendw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pblendw: ; SKX: # BB#0: @@ -711,11 +784,17 @@ ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpeqq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpeqq: ; SKX: # BB#0: @@ -769,11 +848,17 @@ ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrb: ; SKX: # BB#0: @@ -824,11 +909,17 @@ ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrd: ; SKX: # BB#0: @@ -878,11 +969,17 @@ ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrq: ; SKX: # BB#0: @@ -932,11 +1029,17 @@ ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pextrw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrw: ; SKX: # BB#0: @@ -987,11 +1090,17 @@ ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phminposuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phminposuw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [4:0.50] +; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phminposuw: ; SKX: # BB#0: @@ -1042,11 +1151,17 @@ ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pinsrb: ; SKX: # BB#0: @@ -1096,11 +1211,17 @@ ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pinsrd: ; SKX: # BB#0: @@ -1154,12 +1275,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pinsrq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] +; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pinsrq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pinsrq: ; SKX: # BB#0: @@ -1213,11 +1341,17 @@ ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsb: ; SKX: # BB#0: @@ -1268,11 +1402,17 @@ ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxsd: ; SKX: # BB#0: @@ -1323,11 +1463,17 @@ ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxud: ; SKX: # BB#0: @@ -1378,11 +1524,17 @@ ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaxuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaxuw: ; SKX: # BB#0: @@ -1433,11 +1585,17 @@ ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsb: ; SKX: # BB#0: @@ -1488,11 +1646,17 @@ ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminsd: ; SKX: # BB#0: @@ -1543,11 +1707,17 @@ ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminud: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminud: ; SKX: # BB#0: @@ -1598,11 +1768,17 @@ ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pminuw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pminuw: ; SKX: # BB#0: @@ -1658,12 +1834,19 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxbw: ; SKX: # BB#0: @@ -1723,12 +1906,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxbd: ; SKX: # BB#0: @@ -1788,12 +1978,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxbq: ; SKX: # BB#0: @@ -1853,12 +2050,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxdq: ; SKX: # BB#0: @@ -1918,12 +2122,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxwd: ; SKX: # BB#0: @@ -1983,12 +2194,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovsxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovsxwq: ; SKX: # BB#0: @@ -2048,12 +2266,19 @@ ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxbw: ; SKX: # BB#0: @@ -2113,12 +2338,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxbd: ; SKX: # BB#0: @@ -2178,12 +2410,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxbq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxbq: ; SKX: # BB#0: @@ -2243,12 +2482,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxdq: ; SKX: # BB#0: @@ -2308,12 +2554,19 @@ ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxwd: ; SKX: # BB#0: @@ -2373,12 +2626,19 @@ ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmovzxwq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmovzxwq: ; SKX: # BB#0: @@ -2433,11 +2693,17 @@ ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmuldq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuldq: ; SKX: # BB#0: @@ -2489,11 +2755,17 @@ ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulld: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulld: ; SKX: # BB#0: @@ -2559,15 +2831,25 @@ ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ptest: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: setb %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ptest: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:1.00] -; SKYLAKE-NEXT: vptest (%rdi), %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setb %cl # sched: [1:1.00] +; SKYLAKE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00] +; SKYLAKE-NEXT: setb %cl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] ; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_ptest: ; SKX: # BB#0: @@ -2636,12 +2918,19 @@ ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundpd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:0.67] +; SKYLAKE-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67] ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundpd: ; SKX: # BB#0: @@ -2701,12 +2990,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:1.25] +; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:2.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:0.67] +; SKYLAKE-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundps: ; SKX: # BB#0: @@ -2767,12 +3063,19 @@ ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25] +; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundsd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKYLAKE-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKYLAKE-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundsd: ; SKX: # BB#0: @@ -2833,12 +3136,19 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_roundss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25] +; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_roundss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKYLAKE-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKYLAKE-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundss: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse42-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse42-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse42-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse42-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -38,12 +39,19 @@ ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_32_8: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_8: ; SKX: # BB#0: @@ -101,12 +109,19 @@ ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_32_16: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] ; SKYLAKE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_16: ; SKX: # BB#0: @@ -164,12 +179,19 @@ ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_32_32: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SKYLAKE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_32_32: ; SKX: # BB#0: @@ -227,12 +249,19 @@ ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_64_8: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; SKYLAKE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_8: ; SKX: # BB#0: @@ -290,12 +319,19 @@ ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: crc32_64_64: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] +; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SKYLAKE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: crc32_64_64: ; SKX: # BB#0: @@ -377,6 +413,19 @@ ; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpestri: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; BROADWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; BROADWELL-NEXT: # kill: %ECX %ECX %RCX +; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpestri: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] @@ -385,10 +434,10 @@ ; SKYLAKE-NEXT: movl %ecx, %esi # sched: [1:0.25] ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; SKYLAKE-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] ; SKYLAKE-NEXT: # kill: %ECX %ECX %RCX ; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpestri: ; SKX: # BB#0: @@ -477,6 +526,16 @@ ; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpestrm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpestrm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] @@ -484,8 +543,8 @@ ; SKYLAKE-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpestrm: ; SKX: # BB#0: @@ -560,14 +619,23 @@ ; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpistri: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: movl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: # kill: %ECX %ECX %RCX +; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpistri: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] ; SKYLAKE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [10:3.00] +; SKYLAKE-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] ; SKYLAKE-NEXT: # kill: %ECX %ECX %RCX ; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpistri: ; SKX: # BB#0: @@ -628,11 +696,17 @@ ; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpistrm: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpistrm: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpistrm: ; SKX: # BB#0: @@ -683,11 +757,17 @@ ; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pcmpgtq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pcmpgtq: ; SKX: # BB#0: @@ -741,11 +821,17 @@ ; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pclmulqdq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00] +; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pclmulqdq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pclmulqdq: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/sse-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/sse-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -41,11 +42,17 @@ ; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addps: ; SKX: # BB#0: @@ -101,11 +108,17 @@ ; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_addss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_addss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addss: ; SKX: # BB#0: @@ -165,11 +178,17 @@ ; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andps: ; SKX: # BB#0: @@ -233,11 +252,17 @@ ; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_andnotps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotps: ; SKX: # BB#0: @@ -304,12 +329,19 @@ ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33] -; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpps: ; SKX: # BB#0: @@ -372,11 +404,17 @@ ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cmpss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cmpss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpss: ; SKX: # BB#0: @@ -477,19 +515,33 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_comiss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_comiss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %cl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] ; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %dl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_comiss: ; SKX: # BB#0: @@ -576,12 +628,19 @@ ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2ss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2ss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2ss: ; SKX: # BB#0: @@ -646,12 +705,19 @@ ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtsi2ssq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] +; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2ssq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2ssq: ; SKX: # BB#0: @@ -716,12 +782,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtss2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtss2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2si: ; SKX: # BB#0: @@ -789,12 +862,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvtss2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvtss2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2siq: ; SKX: # BB#0: @@ -862,12 +942,19 @@ ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttss2si: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttss2si: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] -; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttss2si: ; SKX: # BB#0: @@ -932,12 +1019,19 @@ ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_cvttss2siq: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_cvttss2siq: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] -; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [6:1.00] +; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00] ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttss2siq: ; SKX: # BB#0: @@ -997,11 +1091,17 @@ ; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_divps: ; SKX: # BB#0: @@ -1057,11 +1157,17 @@ ; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_divss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_divss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_divss: ; SKX: # BB#0: @@ -1117,11 +1223,17 @@ ; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ldmxcsr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ldmxcsr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_ldmxcsr: ; SKX: # BB#0: @@ -1179,11 +1291,17 @@ ; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxps: ; SKX: # BB#0: @@ -1240,11 +1358,17 @@ ; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_maxss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_maxss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxss: ; SKX: # BB#0: @@ -1301,11 +1425,17 @@ ; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minps: ; SKX: # BB#0: @@ -1362,11 +1492,17 @@ ; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_minss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_minss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minss: ; SKX: # BB#0: @@ -1428,12 +1564,19 @@ ; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movaps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movaps: ; SKX: # BB#0: @@ -1495,10 +1638,15 @@ ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movhlps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movhlps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movhlps: ; SKX: # BB#0: @@ -1558,12 +1706,19 @@ ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movhps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movhps: ; SKX: # BB#0: @@ -1628,11 +1783,17 @@ ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movlhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movlhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlhps: ; SKX: # BB#0: @@ -1692,12 +1853,19 @@ ; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movlps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movlps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlps: ; SKX: # BB#0: @@ -1757,10 +1925,15 @@ ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movmskps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movmskps: ; SKX: # BB#0: @@ -1813,10 +1986,15 @@ ; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movntps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntps: ; SKX: # BB#0: @@ -1872,12 +2050,19 @@ ; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movss_mem: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movss_mem: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movss_mem: ; SKX: # BB#0: @@ -1937,10 +2122,15 @@ ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movss_reg: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movss_reg: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movss_reg: ; SKX: # BB#0: @@ -1996,12 +2186,19 @@ ; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_movups: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movups: ; SKX: # BB#0: @@ -2060,11 +2257,17 @@ ; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulps: ; SKX: # BB#0: @@ -2120,11 +2323,17 @@ ; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_mulss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_mulss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulss: ; SKX: # BB#0: @@ -2184,11 +2393,17 @@ ; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_orps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_orps: ; SKX: # BB#0: @@ -2249,10 +2464,15 @@ ; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_prefetchnta: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_prefetchnta: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_prefetchnta: ; SKX: # BB#0: @@ -2311,12 +2531,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rcpps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [4:1.00] +; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpps: ; SKX: # BB#0: @@ -2389,13 +2616,21 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rcpss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rcpss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpss: ; SKX: # BB#0: @@ -2468,12 +2703,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rsqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [4:1.00] +; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtps: ; SKX: # BB#0: @@ -2546,13 +2788,21 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_rsqrtss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_rsqrtss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtss: ; SKX: # BB#0: @@ -2619,10 +2869,15 @@ ; HASWELL-NEXT: sfence # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sfence: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: sfence # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sfence: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: sfence # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: sfence # sched: [2:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sfence: ; SKX: # BB#0: @@ -2678,11 +2933,17 @@ ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_shufps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufps: ; SKX: # BB#0: @@ -2744,12 +3005,19 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00] -; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [12:1.00] +; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtps: ; SKX: # BB#0: @@ -2822,13 +3090,21 @@ ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_sqrtss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] +; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_sqrtss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] -; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50] +; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] ; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00] ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtss: ; SKX: # BB#0: @@ -2892,11 +3168,17 @@ ; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_stmxcsr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_stmxcsr: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_stmxcsr: ; SKX: # BB#0: @@ -2954,11 +3236,17 @@ ; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subps: ; SKX: # BB#0: @@ -3014,11 +3302,17 @@ ; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_subss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_subss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subss: ; SKX: # BB#0: @@ -3114,19 +3408,33 @@ ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_ucomiss: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_ucomiss: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %cl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] ; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:1.00] -; SKYLAKE-NEXT: sete %dl # sched: [1:1.00] +; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_ucomiss: ; SKX: # BB#0: @@ -3212,11 +3520,17 @@ ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpckhps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhps: ; SKX: # BB#0: @@ -3276,11 +3590,17 @@ ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_unpcklps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklps: ; SKX: # BB#0: @@ -3340,11 +3660,17 @@ ; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_xorps: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_xorps: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/ssse3-schedule.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/ssse3-schedule.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/ssse3-schedule.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/ssse3-schedule.ll 2017-10-17 14:41:15.000000000 +0000 @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -47,12 +48,19 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsb: ; SKX: # BB#0: @@ -119,12 +127,19 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsd: ; SKX: # BB#0: @@ -191,12 +206,19 @@ ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pabsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] +; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pabsw: ; SKX: # BB#0: @@ -262,11 +284,17 @@ ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_palignr: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_palignr: ; SKX: # BB#0: @@ -322,11 +350,17 @@ ; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddd: ; SKX: # BB#0: @@ -383,11 +417,17 @@ ; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddsw: ; SKX: # BB#0: @@ -444,11 +484,17 @@ ; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phaddw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phaddw: ; SKX: # BB#0: @@ -505,11 +551,17 @@ ; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubd: ; SKX: # BB#0: @@ -566,11 +618,17 @@ ; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubsw: ; SKX: # BB#0: @@ -627,11 +685,17 @@ ; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_phsubw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phsubw: ; SKX: # BB#0: @@ -688,11 +752,17 @@ ; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmaddubsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # BB#0: @@ -750,11 +820,17 @@ ; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pmulhrsw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # BB#0: @@ -811,11 +887,17 @@ ; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_pshufb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: ; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pshufb: ; SKX: # BB#0: @@ -876,11 +958,17 @@ ; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignb: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignb: ; SKX: # BB#0: @@ -941,11 +1029,17 @@ ; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignd: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignd: ; SKX: # BB#0: @@ -1006,11 +1100,17 @@ ; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [2:1.00] ; +; BROADWELL-LABEL: test_psignw: +; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [2:1.00] +; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [2:1.00] +; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_psignw: ; SKX: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vec_minmax_match.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vec_minmax_match.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vec_minmax_match.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vec_minmax_match.ll 2017-10-17 14:41:15.000000000 +0000 @@ -219,3 +219,33 @@ ret <4 x i32> %r } +define <4 x i32> @wrong_pred_for_smin_with_not(<4 x i32> %x) { +; CHECK-LABEL: wrong_pred_for_smin_with_not: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm1 +; CHECK-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [4294967291,4294967291,4294967291,4294967291] +; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %not_x = xor <4 x i32> %x, + %cmp = icmp ugt <4 x i32> %x, + %sel = select <4 x i1> %cmp, <4 x i32> %not_x, <4 x i32> + ret <4 x i32> %sel +} + +define <4 x i32> @wrong_pred_for_smin_with_subnsw(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: wrong_pred_for_smin_with_subnsw: +; CHECK: # BB#0: +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq + %sub = sub nsw <4 x i32> %x, %y + %cmp = icmp ugt <4 x i32> %x, %y + %sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub + ret <4 x i32> %sel +} + diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-128-v16.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-128-v16.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-128-v16.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-128-v16.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1215,6 +1215,28 @@ ret <16 x i8> %shuffle } +define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: +; SSE: # BB#0: +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: packuswb %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = lshr <8 x i16> %a0, + %2 = lshr <8 x i16> %a1, + %3 = bitcast <8 x i16> %1 to <16 x i8> + %4 = bitcast <8 x i16> %2 to <16 x i8> + %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> + ret <16 x i8> %5 +} + define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { ; Nothing interesting to test here. Just make sure we didn't crashe. ; ALL-LABEL: stress_test2: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-256-v32.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-256-v32.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-256-v32.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-256-v32.ll 2017-10-17 14:41:15.000000000 +0000 @@ -2262,6 +2262,35 @@ ret <32 x i8> %shuffle } +define <32 x i8> @shuffe_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<16 x i16> %a0, <16 x i16> %a1) { +; AVX1-LABEL: shuffe_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512VL-LABEL: shuffe_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX2OR512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2OR512VL-NEXT: retq + %1 = lshr <16 x i16> %a0, + %2 = lshr <16 x i16> %a1, + %3 = bitcast <16 x i16> %1 to <32 x i8> + %4 = bitcast <16 x i16> %2 to <32 x i8> + %5 = shufflevector <32 x i8> %3, <32 x i8> %4, <32 x i32> + ret <32 x i8> %5 +} + define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-LABEL: PR28136: ; AVX1: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-256-v4.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-256-v4.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-256-v4.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-256-v4.ll 2017-10-17 14:41:15.000000000 +0000 @@ -275,6 +275,27 @@ ret <4 x double> %shuffle } +define <4 x double> @shuffle_v4f64_0213(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: shuffle_v4f64_0213: +; AVX1: # BB#0: +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2] +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4f64_0213: +; AVX2: # BB#0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4f64_0213: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} + define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { ; ALL-LABEL: shuffle_v4f64_0423: ; ALL: # BB#0: @@ -756,6 +777,27 @@ ret <4 x i64> %shuffle } +define <4 x i64> @shuffle_v4i64_0213(<4 x i64> %a, <4 x i64> %b) { +; AVX1-LABEL: shuffle_v4i64_0213: +; AVX1: # BB#0: +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2] +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4i64_0213: +; AVX2: # BB#0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4i64_0213: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %shuffle +} + define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0124: ; AVX1: # BB#0: diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-512-v64.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-512-v64.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-512-v64.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-512-v64.ll 2017-10-17 14:41:15.000000000 +0000 @@ -538,3 +538,54 @@ %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> ret <64 x i8> %shuffle } + +define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { +; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm1 +; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512F-NEXT: vpackuswb %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512BW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; AVX512BW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm1 +; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: +; AVX512VBMI: # BB#0: +; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127] +; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 +; AVX512VBMI-NEXT: retq + %1 = lshr <32 x i16> %a0, + %2 = lshr <32 x i16> %a1, + %3 = bitcast <32 x i16> %1 to <64 x i8> + %4 = bitcast <32 x i16> %2 to <64 x i8> + %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> + ret <64 x i8> %5 +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-masked.ll llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-masked.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/CodeGen/X86/vector-shuffle-masked.ll 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/CodeGen/X86/vector-shuffle-masked.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=CHECK define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i32_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -18,7 +18,7 @@ define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i32_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -31,7 +31,7 @@ define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i32_2345: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -45,7 +45,7 @@ define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i32_2345: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -58,7 +58,7 @@ define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v2i64_12: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -72,7 +72,7 @@ define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v2i64_12: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> @@ -85,7 +85,7 @@ define <4 x i64> @mask_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i64_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -99,7 +99,7 @@ define <4 x i64> @maskz_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i64_1234: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3],ymm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -112,7 +112,7 @@ define <4 x i64> @mask_shuffle_v4i64_1230(<4 x i64> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v4i64_1230: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -126,7 +126,7 @@ define <4 x i64> @maskz_shuffle_v4i64_1230(<4 x i64> %a, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v4i64_1230: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,0] ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> @@ -139,7 +139,7 @@ define <8 x i32> @mask_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v8i32_12345678: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -152,7 +152,7 @@ define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v8i32_12345678: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7],ymm1[0] ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -164,7 +164,7 @@ define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v8i32_23456789: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1] ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -177,7 +177,7 @@ define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v8i32_23456789: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1] ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -189,7 +189,7 @@ define <8 x i32> @mask_shuffle_v8i32_12345670(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_shuffle_v8i32_12345670: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -202,7 +202,7 @@ define <8 x i32> @maskz_shuffle_v8i32_12345670(<8 x i32> %a, i8 %mask) { ; CHECK-LABEL: maskz_shuffle_v8i32_12345670: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7,0] ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> @@ -215,7 +215,7 @@ ; CHECK-LABEL: mask_shuffle_v8i32_23456701: ; CHECK: # BB#0: ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> @@ -228,7 +228,7 @@ ; CHECK-LABEL: maskz_shuffle_v8i32_23456701: ; CHECK: # BB#0: ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0] -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> @@ -240,7 +240,7 @@ define <4 x i32> @mask_extract_v8i32_v4i32_0(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i32_v4i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -254,7 +254,7 @@ define <4 x i32> @mask_extract_v8i32_v4i32_0_z(<8 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i32_v4i32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -268,7 +268,7 @@ define <4 x i32> @mask_extract_v8i32_v4i32_1(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i32_v4i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -283,7 +283,7 @@ define <4 x i32> @mask_extract_v8i32_v4i32_1_z(<8 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i32_v4i32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -297,7 +297,7 @@ define <4 x float> @mask_extract_v8f32_v4f32_0(<8 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f32_v4f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -311,7 +311,7 @@ define <4 x float> @mask_extract_v8f32_v4f32_0_z(<8 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f32_v4f32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -325,7 +325,7 @@ define <4 x float> @mask_extract_v8f32_v4f32_1(<8 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f32_v4f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -340,7 +340,7 @@ define <4 x float> @mask_extract_v8f32_v4f32_1_z(<8 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f32_v4f32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -354,7 +354,7 @@ define <2 x i64> @mask_extract_v4i64_v2i64_0(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v4i64_v2i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -368,7 +368,7 @@ define <2 x i64> @mask_extract_v4i64_v2i64_0_z(<4 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v4i64_v2i64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -382,7 +382,7 @@ define <2 x i64> @mask_extract_v4i64_v2i64_1(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v4i64_v2i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -397,7 +397,7 @@ define <2 x i64> @mask_extract_v4i64_v2i64_1_z(<4 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v4i64_v2i64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -411,7 +411,7 @@ define <2 x double> @mask_extract_v4f64_v2f64_0(<4 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v4f64_v2f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -425,7 +425,7 @@ define <2 x double> @mask_extract_v4f64_v2f64_0_z(<4 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v4f64_v2f64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -439,7 +439,7 @@ define <2 x double> @mask_extract_v4f64_v2f64_1(<4 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v4f64_v2f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -454,7 +454,7 @@ define <2 x double> @mask_extract_v4f64_v2f64_1_z(<4 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v4f64_v2f64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -468,7 +468,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_0(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -482,7 +482,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_0_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -496,7 +496,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_1(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -511,7 +511,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_1_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -525,7 +525,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_2(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -540,7 +540,7 @@ define <4 x i32> @mask_extract_v16i32_v4i32_3(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v4i32_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -555,7 +555,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_0(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -569,7 +569,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_0_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -583,7 +583,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_1(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -598,7 +598,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_1_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -612,7 +612,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_2(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -627,7 +627,7 @@ define <4 x float> @mask_extract_v16f32_v4f32_3(<16 x float> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v4f32_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -642,7 +642,7 @@ define <8 x i32> @mask_extract_v16i32_v8i32_0(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v8i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -654,7 +654,7 @@ define <8 x i32> @mask_extract_v16i32_v8i32_0_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v8i32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -666,7 +666,7 @@ define <8 x i32> @mask_extract_v16i32_v8i32_1(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v8i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -679,7 +679,7 @@ define <8 x i32> @mask_extract_v16i32_v8i32_1_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16i32_v8i32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -691,7 +691,7 @@ define <8 x float> @mask_extract_v16f32_v8f32_0(<16 x float> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v8f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> @@ -703,7 +703,7 @@ define <8 x float> @mask_extract_v16f32_v8f32_0_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v8f32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> @@ -715,7 +715,7 @@ define <8 x float> @mask_extract_v16f32_v8f32_1(<16 x float> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v8f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -728,7 +728,7 @@ define <8 x float> @mask_extract_v16f32_v8f32_1_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v16f32_v8f32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> @@ -740,7 +740,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_0(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -754,7 +754,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_0_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -768,7 +768,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_1(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -783,7 +783,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_1_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -797,7 +797,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_2(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -812,7 +812,7 @@ define <2 x i64> @mask_extract_v8i64_v2i64_3(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v2i64_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -827,7 +827,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_0(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -841,7 +841,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_0_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -855,7 +855,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_1(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -870,7 +870,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_1_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -884,7 +884,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_2(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_2: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $2, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -899,7 +899,7 @@ define <2 x double> @mask_extract_v8f64_v2f64_3(<8 x double> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v2f64_3: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $3, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -914,7 +914,7 @@ define <4 x i64> @mask_extract_v8i64_v4i64_0(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v4i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -927,7 +927,7 @@ define <4 x i64> @mask_extract_v8i64_v4i64_0_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v4i64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -940,7 +940,7 @@ define <4 x i64> @mask_extract_v8i64_v4i64_1(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v4i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -954,7 +954,7 @@ define <4 x i64> @mask_extract_v8i64_v4i64_1_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8i64_v4i64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -967,7 +967,7 @@ define <4 x double> @mask_extract_v8f64_v4f64_0(<8 x double> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v4f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -980,7 +980,7 @@ define <4 x double> @mask_extract_v8f64_v4f64_0_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v4f64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -993,7 +993,7 @@ define <4 x double> @mask_extract_v8f64_v4f64_1(<8 x double> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v4f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1007,7 +1007,7 @@ define <4 x double> @mask_extract_v8f64_v4f64_1_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_extract_v8f64_v4f64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -1020,7 +1020,7 @@ define <8 x i32> @mask_cast_extract_v8i64_v8i32_0(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -1033,7 +1033,7 @@ define <8 x i32> @mask_cast_extract_v8i64_v8i32_0_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -1046,7 +1046,7 @@ define <8 x i32> @mask_cast_extract_v8i64_v8i32_1(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1060,7 +1060,7 @@ define <8 x i32> @mask_cast_extract_v8i64_v8i32_1_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> @@ -1073,7 +1073,7 @@ define <8 x float> @mask_cast_extract_v8f64_v8f32_0(<8 x double> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -1086,7 +1086,7 @@ define <8 x float> @mask_cast_extract_v8f64_v8f32_0_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -1099,7 +1099,7 @@ define <8 x float> @mask_cast_extract_v8f64_v8f32_1(<8 x double> %a, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1113,7 +1113,7 @@ define <8 x float> @mask_cast_extract_v8f64_v8f32_1_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> @@ -1126,7 +1126,7 @@ define <4 x i32> @mask_cast_extract_v8i64_v4i32_0(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1141,7 +1141,7 @@ define <4 x i32> @mask_cast_extract_v8i64_v4i32_0_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1156,7 +1156,7 @@ define <4 x i32> @mask_cast_extract_v8i64_v4i32_1(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -1172,7 +1172,7 @@ define <4 x i32> @mask_cast_extract_v8i64_v4i32_1_z(<8 x i64> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1187,7 +1187,7 @@ define <4 x float> @mask_cast_extract_v8f64_v4f32_0(<8 x double> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1202,7 +1202,7 @@ define <4 x float> @mask_cast_extract_v8f64_v4f32_0_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1217,7 +1217,7 @@ define <4 x float> @mask_cast_extract_v8f64_v4f32_1(<8 x double> %a, <4 x float> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -1233,7 +1233,7 @@ define <4 x float> @mask_cast_extract_v8f64_v4f32_1_z(<8 x double> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1248,7 +1248,7 @@ define <4 x i64> @mask_cast_extract_v16i32_v4i64_0(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -1262,7 +1262,7 @@ define <4 x i64> @mask_cast_extract_v16i32_v4i64_0_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -1276,7 +1276,7 @@ define <4 x i64> @mask_cast_extract_v16i32_v4i64_1(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1291,7 +1291,7 @@ define <4 x i64> @mask_cast_extract_v16i32_v4i64_1_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> @@ -1305,7 +1305,7 @@ define <4 x double> @mask_cast_extract_v16f32_v4f64_0(<16 x float> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> @@ -1319,7 +1319,7 @@ define <4 x double> @mask_cast_extract_v16f32_v4f64_0_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> @@ -1333,7 +1333,7 @@ define <4 x double> @mask_cast_extract_v16f32_v4f64_1(<16 x float> %a, <4 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1348,7 +1348,7 @@ define <4 x double> @mask_cast_extract_v16f32_v4f64_1_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> @@ -1362,7 +1362,7 @@ define <2 x i64> @mask_cast_extract_v16i32_v2i64_0(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1377,7 +1377,7 @@ define <2 x i64> @mask_cast_extract_v16i32_v2i64_0_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1392,7 +1392,7 @@ define <2 x i64> @mask_cast_extract_v16i32_v2i64_1(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -1408,7 +1408,7 @@ define <2 x i64> @mask_cast_extract_v16i32_v2i64_1_z(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1423,7 +1423,7 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_0(<16 x float> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1438,7 +1438,7 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_0_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1453,7 +1453,7 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper @@ -1469,7 +1469,7 @@ define <2 x double> @mask_cast_extract_v16f32_v2f64_1_z(<16 x float> %a, i8 %mask) { ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1484,7 +1484,7 @@ define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) { ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] ; CHECK-NEXT: retq %q = load double, double* %x, align 1 @@ -1499,7 +1499,7 @@ define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) { ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] ; CHECK-NEXT: retq %q = load double, double* %x, align 1 @@ -1514,7 +1514,7 @@ define <8 x float> @test_broadcast_2f64_8f32(<2 x double> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_2f64_8f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <2 x double>, <2 x double> *%p @@ -1528,7 +1528,7 @@ define <8 x i32> @test_broadcast_2i64_8i32(<2 x i64> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_2i64_8i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <2 x i64>, <2 x i64> *%p @@ -1542,7 +1542,7 @@ define <16 x float> @test_broadcast_2f64_16f32(<2 x double> *%p, i16 %mask) nounwind { ; CHECK-LABEL: test_broadcast_2f64_16f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <2 x double>, <2 x double> *%p @@ -1556,7 +1556,7 @@ define <16 x i32> @test_broadcast_2i64_16i32(<2 x i64> *%p, i16 %mask) nounwind { ; CHECK-LABEL: test_broadcast_2i64_16i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <2 x i64>, <2 x i64> *%p @@ -1570,7 +1570,7 @@ define <16 x float> @test_broadcast_4f64_16f32(<4 x double> *%p, i16 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4f64_16f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %1 = load <4 x double>, <4 x double> *%p @@ -1584,7 +1584,7 @@ define <16 x i32> @test_broadcast_4i64_16i32(<4 x i64> *%p, i16 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4i64_16i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %1 = load <4 x i64>, <4 x i64> *%p @@ -1598,7 +1598,7 @@ define <4 x double> @test_broadcast_4f32_4f64(<4 x float> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4f32_4f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x float>, <4 x float> *%p @@ -1613,7 +1613,7 @@ define <4 x i64> @test_broadcast_4i32_4i64(<4 x i32> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4i32_4i64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p @@ -1628,7 +1628,7 @@ define <8 x double> @test_broadcast_4f32_8f64(<4 x float> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4f32_8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x float>, <4 x float> *%p @@ -1642,7 +1642,7 @@ define <8 x i64> @test_broadcast_4i32_8i64(<4 x i32> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_4i32_8i64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %1 = load <4 x i32>, <4 x i32> *%p @@ -1656,7 +1656,7 @@ define <8 x double> @test_broadcast_8f32_8f64(<8 x float> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_8f32_8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <8 x float>, <8 x float> *%p @@ -1670,7 +1670,7 @@ define <8 x i64> @test_broadcast_8i32_8i64(<8 x i32> *%p, i8 %mask) nounwind { ; CHECK-LABEL: test_broadcast_8i32_8i64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %1 = load <8 x i32>, <8 x i32> *%p @@ -1685,7 +1685,7 @@ ; CHECK-LABEL: test_broadcastf32x2_v4f32: ; CHECK: # BB#0: ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> @@ -1699,7 +1699,7 @@ ; CHECK-LABEL: test_broadcastf32x2_v4f32_z: ; CHECK: # BB#0: ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> @@ -1712,7 +1712,7 @@ define <4 x i32> @test_broadcasti32x2_v4i32(<4 x i32> %vec, <4 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: test_broadcasti32x2_v4i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1726,7 +1726,7 @@ define <4 x i32> @test_broadcasti32x2_v4i32_z(<4 x i32> %vec, i8 %mask) { ; CHECK-LABEL: test_broadcasti32x2_v4i32_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> @@ -1739,7 +1739,7 @@ define <8 x float> @test_broadcastf32x2_v8f32(<8 x float> %vec, <8 x float> %passthru, i8 %mask) { ; CHECK-LABEL: test_broadcastf32x2_v8f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1752,7 +1752,7 @@ define <8 x float> @test_broadcastf32x2_v8f32_z(<8 x float> %vec, i8 %mask) { ; CHECK-LABEL: test_broadcastf32x2_v8f32_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> @@ -1764,7 +1764,7 @@ define <8 x i32> @test_broadcasti32x2_v8i32(<8 x i32> %vec, <8 x i32> %passthru, i8 %mask) { ; CHECK-LABEL: test_broadcasti32x2_v8i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1777,7 +1777,7 @@ define <8 x i32> @test_broadcasti32x2_v8i32_z(<8 x i32> %vec, i8 %mask) { ; CHECK-LABEL: test_broadcasti32x2_v8i32_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> @@ -1789,7 +1789,7 @@ define <16 x float> @test_broadcastf32x2_v16f32_z(<16 x float> %vec, i16 %mask) { ; CHECK-LABEL: test_broadcastf32x2_v16f32_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> @@ -1801,7 +1801,7 @@ define <16 x i32> @test_broadcasti32x2_v16i32(<16 x i32> %vec, <16 x i32> %passthru, i16 %mask) { ; CHECK-LABEL: test_broadcasti32x2_v16i32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1814,7 +1814,7 @@ define <16 x float> @test_broadcastf32x2_v16f32(<16 x float> %vec, <16 x float> %passthru, i16 %mask) { ; CHECK-LABEL: test_broadcastf32x2_v16f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1827,7 +1827,7 @@ define <16 x i32> @test_broadcasti32x2_v16i32_z(<16 x i32> %vec, i16 %mask) { ; CHECK-LABEL: test_broadcasti32x2_v16i32_z: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> @@ -1835,3 +1835,78 @@ %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res } + +define <16 x i8> @mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16: +; CHECK: # BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 +; CHECK-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru + ret <16 x i8> %res +} + +define <16 x i8> @maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, i16 %mask) { +; CHECK-LABEL: maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16: +; CHECK: # BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; CHECK-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer + ret <16 x i8> %res +} + +define <16 x i8> @mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19: +; CHECK: # BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 +; CHECK-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru + ret <16 x i8> %res +} + +define <16 x i8> @maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, i16 %mask) { +; CHECK-LABEL: maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19: +; CHECK: # BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; CHECK-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer + ret <16 x i8> %res +} + +define <16 x i8> @mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23: +; CHECK: # BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 +; CHECK-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru + ret <16 x i8> %res +} + +define <16 x i8> @maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, i16 %mask) { +; CHECK-LABEL: maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23: +; CHECK: # BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; CHECK-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %mask.cast = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer + ret <16 x i8> %res +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s llvm-toolchain-snapshot-6.0~svn316003/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s --- llvm-toolchain-snapshot-6.0~svn315865/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s 2017-10-15 17:43:16.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s 2017-10-17 14:41:15.000000000 +0000 @@ -49,11 +49,6 @@ .long "_ExitProcess@4" // IMAGE_REL_I386_DIR32 # rtdyld-check: *{4}__imp__ExitProcess = 0xffffffff - .global string - .align 1 -string: - .asciz "Hello World!\n" - .global relocations relocations: rel5: @@ -63,8 +58,8 @@ # rtdyld-check: *{2}rel6 = 1 .secidx __imp__OutputDebugStringA // IMAGE_REL_I386_SECTION rel7: -# rtdyld-check: *{4}rel7 = relocations - section_addr(COFF_i386.s.tmp.obj, .data) - .secrel32 relocations // IMAGE_REL_I386_SECREL +# rtdyld-check: *{4}rel7 = string - section_addr(COFF_i386.s.tmp.obj, .data) + .secrel32 string // IMAGE_REL_I386_SECREL # Test that addends work. rel8: @@ -79,3 +74,12 @@ rel11: # rtdyld-check: *{4}rel11 = string - section_addr(COFF_i386.s.tmp.obj, .data) + 1 .long string@SECREL32+1 // IMAGE_REL_I386_SECREL + +# We explicitly add padding to put string outside of the 16bit address space +# (absolute and as an offset from .data), so that relocations involving +# 32bit addresses / offsets are not accidentally truncated to 16 bits. + .space 65536 + .global string + .align 1 +string: + .asciz "Hello World!\n" diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt 2017-10-17 14:41:15.000000000 +0000 @@ -55,6 +55,8 @@ 0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767 0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001 0xe6 0x00 0x50 0x49 # CHECK: addu $9, $6, $7 +0x80 0x43 0xe6 0xff # CHECK: bc1f -48 +0xa0 0x43 0xe2 0xff # CHECK: bc1t -56 0xe6 0x00 0x90 0x49 # CHECK: sub $9, $6, $7 0xa3 0x00 0xd0 0x21 # CHECK: subu $4, $3, $5 0xe0 0x00 0x90 0x31 # CHECK: sub $6, $zero, $7 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/Mips/micromips32r3/valid.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/Mips/micromips32r3/valid.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/Mips/micromips32r3/valid.txt 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/Mips/micromips32r3/valid.txt 2017-10-17 14:41:15.000000000 +0000 @@ -55,6 +55,8 @@ 0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767 0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001 0x00 0xe6 0x49 0x50 # CHECK: addu $9, $6, $7 +0x43 0x80 0xff 0xe6 # CHECK: bc1f -48 +0x43 0xa0 0xff 0xe2 # CHECK: bc1t -56 0x00 0xe6 0x49 0x90 # CHECK: sub $9, $6, $7 0x00 0xa3 0x21 0xd0 # CHECK: subu $4, $3, $5 0x00 0xe0 0x31 0x90 # CHECK: sub $6, $zero, $7 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/prefixes-i386.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/prefixes-i386.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/prefixes-i386.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/prefixes-i386.txt 2017-10-17 14:41:15.000000000 +0000 @@ -0,0 +1,87 @@ +# RUN: llvm-mc --disassemble %s -triple=i386-apple-darwin9 | FileCheck %s + +# CHECK: movl %fs:24, %eax +0x64 0xa1 0x18 0x00 0x00 0x00 # mov eax, dword ptr fs:[18h] + +# CHECK: rep +# CHECK-NEXT: insb %dx, %es:(%edi) +0xf3 0x6c #rep ins +# CHECK: rep +# CHECK-NEXT: insl %dx, %es:(%edi) +0xf3 0x6d #rep ins +# CHECK: rep +# CHECK-NEXT: movsb (%esi), %es:(%edi) +0xf3 0xa4 #rep movs +# CHECK: rep +# CHECK-NEXT: movsl (%esi), %es:(%edi) +0xf3 0xa5 #rep movs +# CHECK: rep +# CHECK-NEXT: outsb (%esi), %dx +0xf3 0x6e #rep outs +# CHECK: rep +# CHECK-NEXT: outsl (%esi), %dx +0xf3 0x6f #rep outs +# CHECK: rep +# CHECK-NEXT: lodsb (%esi), %al +0xf3 0xac #rep lods +# CHECK: rep +# CHECK-NEXT: lodsl (%esi), %eax +0xf3 0xad #rep lods +# CHECK: rep +# CHECK-NEXT: stosb %al, %es:(%edi) +0xf3 0xaa #rep stos +# CHECK: rep +# CHECK-NEXT: stosl %eax, %es:(%edi) +0xf3 0xab #rep stos +# CHECK: rep +# CHECK-NEXT: cmpsb %es:(%edi), (%esi) +0xf3 0xa6 #rep cmps +# CHECK: rep +# CHECK-NEXT: cmpsl %es:(%edi), (%esi) +0xf3 0xa7 #repe cmps +# CHECK: rep +# CHECK-NEXT: scasb %es:(%edi), %al +0xf3 0xae #repe scas +# CHECK: rep +# CHECK-NEXT: scasl %es:(%edi), %eax +0xf3 0xaf #repe scas +# CHECK: repne +# CHECK-NEXT: cmpsb %es:(%edi), (%esi) +0xf2 0xa6 #repne cmps +# CHECK: repne +# CHECK-NEXT: cmpsl %es:(%edi), (%esi) +0xf2 0xa7 #repne cmps +# CHECK: repne +# CHECK-NEXT: scasb %es:(%edi), %al +0xf2 0xae #repne scas +# CHECK: repne +# CHECK-NEXT: scasl %es:(%edi), %eax +0xf2 0xaf #repne scas + +# CHECK: repne +# CHECK-NEXT: scasw %es:(%edi), %ax +0xf2 0x66 0xaf +# CHECK: repne +# CHECK-NEXT: scasw %es:(%edi), %ax +0x66 0xf2 0xaf + +# CHECK: rep +# CHECK-NEXT: scasw %es:(%edi), %ax +0xf3 0x66 0xaf +# CHECK: rep +# CHECK-NEXT: scasw %es:(%edi), %ax +0x66 0xf3 0xaf + +# CHECK: repne +# CHECK: insw %dx, %es:(%edi) +0xf2 0x66 0x6d +# CHECK: repne +# CHECK: insw %dx, %es:(%edi) +0x66 0xf2 0x6d + +# CHECK: rep +# CHECK: insw %dx, %es:(%edi) +0xf3 0x66 0x6d +# CHECK: rep +# CHECK: insw %dx, %es:(%edi) +0x66 0xf3 0x6d diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/prefixes.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/prefixes.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/prefixes.txt 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/prefixes.txt 2017-10-17 14:41:15.000000000 +0000 @@ -1,5 +1,60 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s +# CHECK: rep +# CHECK-NEXT: insb %dx, %es:(%rdi) +0xf3 0x6c #rep ins +# CHECK: rep +# CHECK-NEXT: insl %dx, %es:(%rdi) +0xf3 0x6d #rep ins +# CHECK: rep +# CHECK-NEXT: movsb (%rsi), %es:(%rdi) +0xf3 0xa4 #rep movs +# CHECK: rep +# CHECK-NEXT: movsl (%rsi), %es:(%rdi) +0xf3 0xa5 #rep movs +# CHECK: rep +# CHECK-NEXT: outsb (%rsi), %dx +0xf3 0x6e #rep outs +# CHECK: rep +# CHECK-NEXT: outsl (%rsi), %dx +0xf3 0x6f #rep outs +# CHECK: rep +# CHECK-NEXT: lodsb (%rsi), %al +0xf3 0xac #rep lods +# CHECK: rep +# CHECK-NEXT: lodsl (%rsi), %eax +0xf3 0xad #rep lods +# CHECK: rep +# CHECK-NEXT: stosb %al, %es:(%rdi) +0xf3 0xaa #rep stos +# CHECK: rep +# CHECK-NEXT: stosl %eax, %es:(%rdi) +0xf3 0xab #rep stos +# CHECK: rep +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf3 0xa6 #rep cmps +# CHECK: rep +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf3 0xa7 #repe cmps +# CHECK: rep +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf3 0xae #repe scas +# CHECK: rep +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf3 0xaf #repe scas +# CHECK: repne +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf2 0xa6 #repne cmps +# CHECK: repne +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf2 0xa7 #repne cmps +# CHECK: repne +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf2 0xae #repne scas +# CHECK: repne +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf2 0xaf #repne scas + # CHECK: lock # CHECK-NEXT: orl $16, %fs:776 0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10 @@ -50,7 +105,6 @@ # Test that multiple redundant prefixes work (redundant, but valid x86). # CHECK: rep -# CHECK-NEXT: rep # CHECK-NEXT: stosq 0xf3 0xf3 0x48 0xab diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/prefixes-x86_64.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/prefixes-x86_64.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/prefixes-x86_64.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/prefixes-x86_64.txt 2017-10-17 14:41:15.000000000 +0000 @@ -0,0 +1,38 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s + +# CHECK: mulsd %xmm7, %xmm7 +0x66 0xF3 0xF2 0x0F 0x59 0xFF +# CHECK: mulss %xmm7, %xmm7 +0x66 0xF2 0xF3 0x0F 0x59 0xFF +# CHECK: mulpd %xmm7, %xmm7 +0x66 0x0F 0x59 0xFF +# CHECK: mulsd %xmm7, %xmm7 +0xf2 0x66 0x0f 0x59 0xff + +# CHECK: repne +# CHECK-NEXT: scasw %es:(%rdi), %ax +0xf2 0x66 0xaf +# CHECK: rep +# CHECK-NEXT: scasw %es:(%rdi), %ax +0x66 0xf2 0xaf + +# CHECK: rep +# CHECK-NEXT: scasw %es:(%rdi), %ax +0xf3 0x66 0xaf +# CHECK: rep +# CHECK-NEXT: scasw %es:(%rdi), %ax +0x66 0xf3 0xaf + +# CHECK: repne +# CHECK: insw %dx, %es:(%rdi) +0xf2 0x66 0x6d +# CHECK: repne +# CHECK: insw %dx, %es:(%rdi) +0x66 0xf2 0x6d + +# CHECK: rep +# CHECK: insw %dx, %es:(%rdi) +0xf3 0x66 0x6d +# CHECK: rep +# CHECK: insw %dx, %es:(%rdi) +0x66 0xf3 0x6d diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/x86-32.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/x86-32.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/x86-32.txt 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/x86-32.txt 2017-10-17 14:41:15.000000000 +0000 @@ -797,3 +797,14 @@ # CHECK: nopw %ax 0x66 0x0f 0x1f 0xc0 + +# CHECK: movw %bx, %cs:(%esi,%ebp) +0x2e 0x66 0x89 0x1c 0x2e +# CHECK: movl %ebx, %cs:(%si) +0x2e 0x67 0x89 0x1c +# CHECK: movl %ebx, %cs:(%esi,%ebp) +0x2e 0x89 0x1c 0x2e +# CHECK: movw %bx, %cs:(%si) +0x2e 0x67 0x66 0x89 0x1c +# CHECK: movw %bx, %cs:(%si) +0x2e 0x66 0x67 0x89 0x1c diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/x86-64.txt llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/x86-64.txt --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Disassembler/X86/x86-64.txt 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Disassembler/X86/x86-64.txt 2017-10-17 14:41:15.000000000 +0000 @@ -486,3 +486,18 @@ # CHECK: nopq %rax 0x48 0x0f 0x1f 0xC0 + +# CHECK: xchgw %di, %ax +0x66 0x3e 0x97 + +# CHECK: movw %bx, %cs:(%rsi,%rbp) +0x2e 0x66 0x89 0x1c 0x2e +# CHECK: movl %ebx, %cs:(%esi,%ebp) +0x2e 0x67 0x89 0x1c 0x2e +# CHECK: movl %ebx, %cs:(%rsi,%rbp) +0x2e 0x89 0x1c 0x2e +# CHECK: movw %bx, %cs:(%esi,%ebp) +0x2e 0x67 0x66 0x89 0x1c 0x2e +# CHECK: movw %bx, %cs:(%esi,%ebp) +0x2e 0x66 0x67 0x89 0x1c 0x2e + diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/MC/Mips/micromips/valid.s llvm-toolchain-snapshot-6.0~svn316003/test/MC/Mips/micromips/valid.s --- llvm-toolchain-snapshot-6.0~svn315865/test/MC/Mips/micromips/valid.s 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/MC/Mips/micromips/valid.s 2017-10-17 14:41:15.000000000 +0000 @@ -1,4 +1,4 @@ -# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mattr=micromips | FileCheck %s +# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -show-inst -mattr=micromips | FileCheck %s .set noat addiusp -16 # CHECK: addiusp -16 # encoding: [0x4f,0xf9] @@ -243,9 +243,13 @@ c.un.d $fcc6, $f22, $f24 # CHECK: c.un.d $fcc6, $f22, $f24 # encoding: [0x57,0x16,0xc4,0x7c] c.un.s $fcc1, $f30, $f4 # CHECK: c.un.s $fcc1, $f30, $f4 # encoding: [0x54,0x9e,0x20,0x7c] bc1t 8 # CHECK: bc1t 8 # encoding: [0x43,0xa0,0x00,0x04] + # CHECK-NEXT: # MatcherInfo; +// CHECK-NEXT: static MyTargetInstructionSelector::ComplexMatcherMemFn ComplexPredicateFns[]; // CHECK-NEXT: #endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL // CHECK-LABEL: #ifdef GET_GLOBALISEL_TEMPORARIES_INIT // CHECK-NEXT: , State(2), -// CHECK-NEXT: MatcherInfo({TypeObjects, FeatureBitsets, I64ImmPredicateFns, APIntImmPredicateFns, APFloatImmPredicateFns, { -// CHECK-NEXT: nullptr, // GICP_Invalid -// CHECK-NEXT: &MyTargetInstructionSelector::selectComplexPattern, // gi_complex -// CHECK-NEXT: &MyTargetInstructionSelector::selectComplexPatternRR, // gi_complex_rr -// CHECK-NEXT: }}) +// CHECK-NEXT: MatcherInfo({TypeObjects, FeatureBitsets, I64ImmPredicateFns, APIntImmPredicateFns, APFloatImmPredicateFns, ComplexPredicateFns}) // CHECK-NEXT: #endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT // CHECK-LABEL: enum SubtargetFeatureBits : uint8_t { @@ -147,6 +144,13 @@ // CHECK-NEXT: Predicate_simm9, // CHECK-NEXT: }; +// CHECK-LABEL: MyTargetInstructionSelector::ComplexMatcherMemFn +// CHECK-NEXT: MyTargetInstructionSelector::ComplexPredicateFns[] = { +// CHECK-NEXT: nullptr, // GICP_Invalid +// CHECK-NEXT: &MyTargetInstructionSelector::selectComplexPattern, // gi_complex +// CHECK-NEXT: &MyTargetInstructionSelector::selectComplexPatternRR, // gi_complex_rr +// CHECK-NEXT: } + // CHECK: bool MyTargetInstructionSelector::selectImpl(MachineInstr &I) const { // CHECK-NEXT: MachineFunction &MF = *I.getParent()->getParent(); // CHECK-NEXT: MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -814,9 +818,29 @@ def fpimmz : FPImmLeafisExactlyValue(0.0); }]>; def MOVfpimmz : I<(outs FPR32:$dst), (ins f32imm:$imm), [(set FPR32:$dst, fpimmz:$imm)]>; -//===- Test a pattern with an MBB operand. --------------------------------===// +//===- Test a simple pattern with inferred pointer operands. ---------------===// // CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 22*/ [[LABEL:[0-9]+]], +// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD, +// CHECK-NEXT: GIM_CheckNonAtomic, /*MI*/0, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID, +// CHECK-NEXT: // MIs[0] src1 +// CHECK-NEXT: GIM_CheckPointerToAny, /*MI*/0, /*Op*/1, /*SizeInBits*/32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/MyTarget::GPR32RegClassID, +// CHECK-NEXT: // (ld:{ *:[i32] } GPR32:{ *:[i32] }:$src1)<><> => (LOAD:{ *:[i32] } GPR32:{ *:[i32] }:$src1) +// CHECK-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/MyTarget::LOAD, +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0, +// CHECK-NEXT: GIR_Done, +// CHECK-NEXT: // Label 22: @[[LABEL]] + +def LOAD : I<(outs GPR32:$dst), (ins GPR32:$src1), + [(set GPR32:$dst, (load GPR32:$src1))]>; +//===- Test a pattern with an MBB operand. --------------------------------===// + +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 23*/ [[LABEL:[0-9]+]], // CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/1, // CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_BR, // CHECK-NEXT: // MIs[0] target @@ -825,7 +849,7 @@ // CHECK-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/MyTarget::BR, // CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 22: @[[LABEL]] +// CHECK-NEXT: // Label 23: @[[LABEL]] def BR : I<(outs), (ins unknown:$target), [(br bb:$target)]>; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/ThinLTO/X86/lazyload_metadata.ll llvm-toolchain-snapshot-6.0~svn316003/test/ThinLTO/X86/lazyload_metadata.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/ThinLTO/X86/lazyload_metadata.ll 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/ThinLTO/X86/lazyload_metadata.ll 2017-10-17 14:41:15.000000000 +0000 @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 51 bitcode-reader - Number of Metadata records loaded +; LAZY: 53 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 60 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 62 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/tools/llvm-cov/deferred-region.cpp llvm-toolchain-snapshot-6.0~svn316003/test/tools/llvm-cov/deferred-region.cpp --- llvm-toolchain-snapshot-6.0~svn315865/test/tools/llvm-cov/deferred-region.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/tools/llvm-cov/deferred-region.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -68,6 +68,14 @@ return; } +void if_else(bool flag) { + if (flag) { // CHECK: [[@LINE]]|{{ +}}2| + return; // CHECK: [[@LINE]]|{{ +}}1| + } else { // CHECK: [[@LINE]]|{{ +}}1| + return; // CHECK: [[@LINE]]|{{ +}}1| + } // CHECK: [[@LINE]]|{{ +}}1| +} + int main() { foo(0); foo(1); @@ -75,6 +83,8 @@ for_loop(); while_loop(); gotos(); + if_else(true); + if_else(false); return 0; } @@ -107,3 +117,5 @@ // MARKER-NEXT: Highlighted line 67, 1 -> ? // MARKER-NEXT: Highlighted line 68, 1 -> 8 // MARKER-NEXT: Highlighted line 69, 1 -> 2 +// MARKER-NEXT: Marker at 72:7 = 2 +// MARKER-NEXT: Highlighted line 77, 1 -> 2 Binary files /tmp/tmpeIEJLN/p5bPo4EtMy/llvm-toolchain-snapshot-6.0~svn315865/test/tools/llvm-cov/Inputs/deferred-regions.covmapping and /tmp/tmpeIEJLN/OBGBlo_nAS/llvm-toolchain-snapshot-6.0~svn316003/test/tools/llvm-cov/Inputs/deferred-regions.covmapping differ Binary files /tmp/tmpeIEJLN/p5bPo4EtMy/llvm-toolchain-snapshot-6.0~svn315865/test/tools/llvm-cov/Inputs/deferred-regions.profdata and /tmp/tmpeIEJLN/OBGBlo_nAS/llvm-toolchain-snapshot-6.0~svn316003/test/tools/llvm-cov/Inputs/deferred-regions.profdata differ diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/tools/llvm-dwarfdump/X86/debug_frame_GNU_args_size.s llvm-toolchain-snapshot-6.0~svn316003/test/tools/llvm-dwarfdump/X86/debug_frame_GNU_args_size.s --- llvm-toolchain-snapshot-6.0~svn315865/test/tools/llvm-dwarfdump/X86/debug_frame_GNU_args_size.s 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/tools/llvm-dwarfdump/X86/debug_frame_GNU_args_size.s 2017-10-17 14:41:16.000000000 +0000 @@ -0,0 +1,15 @@ +# RUN: llvm-mc %s -filetype=obj -triple=i686-pc-linux -o %t +# RUN: llvm-dwarfdump -v %t | FileCheck %s + +# CHECK: .eh_frame contents: +# CHECK: 00000018 00000010 0000001c FDE cie=0000001c pc=00000000...00000000 +# CHECK-NEXT: DW_CFA_GNU_args_size: +16 +# CHECK-NEXT: DW_CFA_nop: + +.text +.globl foo +.type foo,@function +foo: + .cfi_startproc + .cfi_escape 0x2e, 0x10 + .cfi_endproc diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll 2017-10-17 14:41:15.000000000 +0000 @@ -0,0 +1,32 @@ +; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%ArrayImpl = type { i64, i64 addrspace(100)*, [1 x i64], [1 x i64], [1 x i64], i64, i64, double addrspace(100)*, double addrspace(100)*, i8, i64 } + +; Function Attrs: readnone +declare %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)*) #0 + +; Function Attrs: readnone +declare i64* @getaddr_i64(i64 addrspace(100)*) #0 + +; Make sure that the test compiles without a crash. +; Bug https://bugs.llvm.org/show_bug.cgi?id=34937 + +define hidden void @wrapon_fn173() { + +; CHECK-LABEL: @wrapon_fn173 + +entry: + %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef) + br label %loop + +loop: + %1 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef) + %2 = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8 + %3 = call i64* @getaddr_i64(i64 addrspace(100)* %2) + br label %loop +} + +attributes #0 = { readnone } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/GVN/PRE/pre-load-guards.ll llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/GVN/PRE/pre-load-guards.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/GVN/PRE/pre-load-guards.ll 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/GVN/PRE/pre-load-guards.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,146 +0,0 @@ -; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -declare void @llvm.experimental.guard(i1, ...) - -; This is a motivating example on why we prohibit hoisting through guards. -; In the bottom block, we check that the index is within bounds and only access -; the element in this case and deoptimize otherwise. If we hoist the load to a -; place above the guard, it will may lead to out-of-bound array access. -define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) { -; CHECK-LABEL: @test_motivation( -block1: - %el1 = getelementptr inbounds i32, i32* %q, i32 %index - %el2 = getelementptr inbounds i32, i32* %p, i32 %index - br i1 %C, label %block2, label %block3 - -block2: - -; CHECK: block2: -; CHECK-NEXT: br -; CHECK-NOT: load -; CHECK-NOT: sge -; CHECK-NOT: slt -; CHECK-NOT: and - br label %block4 - -block3: - store i32 0, i32* %el1 - br label %block4 - -block4: - -; CHECK: block4: -; CHECK: %cond1 = icmp sge i32 %index, 0 -; CHECK-NEXT: %cond2 = icmp slt i32 %index, %len -; CHECK-NEXT: %in.bounds = and i1 %cond1, %cond2 -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) -; CHECK-NEXT: %PRE = load i32, i32* %P2 -; CHECK: ret i32 %PRE - - %P2 = phi i32* [%el2, %block3], [%el1, %block2] - %cond1 = icmp sge i32 %index, 0 - %cond2 = icmp slt i32 %index, %len - %in.bounds = and i1 %cond1, %cond2 - call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ] - %PRE = load i32, i32* %P2 - ret i32 %PRE -} - -; Guard in load's block that is above the load should prohibit the PRE. -define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) { -; CHECK-LABEL: @test_guard_01( -block1: - br i1 %C, label %block2, label %block3 - -block2: - -; CHECK: block2: -; CHECK-NEXT: br -; CHECK-NOT: load - - br label %block4 - -block3: - store i32 0, i32* %p - br label %block4 - -block4: - -; CHECK: block4: -; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %G) -; CHECK-NEXT: load -; CHECK: ret i32 - - %P2 = phi i32* [%p, %block3], [%q, %block2] - call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] - %PRE = load i32, i32* %P2 - ret i32 %PRE -} - -; Guard in load's block that is below the load should not prohibit the PRE. -define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) { -; CHECK-LABEL: @test_guard_02( -block1: - br i1 %C, label %block2, label %block3 - -block2: - -; CHECK: block2: -; CHECK-NEXT: load i32, i32* %q - - br label %block4 - -block3: - store i32 0, i32* %p - br label %block4 - -block4: - -; CHECK: block4: -; CHECK-NEXT: phi i32 [ -; CHECK-NEXT: phi i32* [ -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) -; CHECK-NOT: load -; CHECK: ret i32 - - %P2 = phi i32* [%p, %block3], [%q, %block2] - %PRE = load i32, i32* %P2 - call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] - ret i32 %PRE -} - -; Guard above the load's block should prevent PRE from hoisting through it. -define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) { -; CHECK-LABEL: @test_guard_03( -block1: - br i1 %C, label %block2, label %block3 - -block2: - -; CHECK: block2: -; CHECK-NEXT: br -; CHECK-NOT: load - - br label %block4 - -block3: - store i32 0, i32* %p - br label %block4 - -block4: - -; CHECK: block4: -; CHECK-NEXT: phi i32* -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %G) -; CHECK-NEXT: load -; CHECK-NEXT: ret i32 - - %P2 = phi i32* [%p, %block3], [%q, %block2] - call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ] - br label %block5 - -block5: - %PRE = load i32, i32* %P2 - ret i32 %PRE -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/GVN/PRE/pre-load.ll llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/GVN/PRE/pre-load.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/GVN/PRE/pre-load.ll 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/GVN/PRE/pre-load.ll 2017-10-17 14:41:15.000000000 +0000 @@ -430,31 +430,3 @@ call void @g(i32 %NOTPRE) cleanupret from %c2 unwind to caller } - -; Don't PRE load across calls. - -define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) { -; CHECK-LABEL: @test13( -; CHECK: entry: -; CHECK-NEXT: icmp eq -; CHECK-NEXT: br i1 -entry: - %tobool = icmp eq i32 %a, 0 - br i1 %tobool, label %if.end, label %if.then - -; CHECK: if.then: -; CHECK-NEXT: load i32 -; CHECK-NEXT: store i32 -if.then: - %uu = load i32, i32* %x, align 4 - store i32 %uu, i32* %r, align 4 - br label %if.end - -; CHECK: if.end: -; CHECK-NEXT: call void @f() -; CHECK-NEXT: load i32 -if.end: - call void @f() - %vv = load i32, i32* %x, align 4 - ret i32 %vv -} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/InstCombine/minmax-fold.ll llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/InstCombine/minmax-fold.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/InstCombine/minmax-fold.ll 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/InstCombine/minmax-fold.ll 2017-10-17 14:41:15.000000000 +0000 @@ -404,8 +404,8 @@ ; CHECK-LABEL: @clamp_signed3( ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], 255 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X]], 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 [[MIN]], i32 15 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp slt i32 %x, 255 @@ -421,8 +421,8 @@ ; CHECK-LABEL: @clamp_signed4( ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], 15 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 [[MAX]], i32 255 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp sgt i32 %x, 15 @@ -472,8 +472,8 @@ ; CHECK-LABEL: @clamp_unsigned3( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], 255 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[X]], 15 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 [[MIN]], i32 15 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp ult i32 %x, 255 @@ -489,8 +489,8 @@ ; CHECK-LABEL: @clamp_unsigned4( ; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], 15 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], i32 [[MAX]], i32 255 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255 ; CHECK-NEXT: ret i32 [[R]] ; %cmp2 = icmp ugt i32 %x, 15 @@ -507,7 +507,7 @@ ; CHECK-LABEL: @clamp_check_for_no_infinite_loop1( ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I:%.*]], 255 ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[I]], i32 255 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[I]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SEL1]], 0 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP1]], i32 [[SEL1]], i32 0 ; CHECK-NEXT: ret i32 [[RES]] ; @@ -523,8 +523,8 @@ ; CHECK-LABEL: @clamp_check_for_no_infinite_loop2( ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[I:%.*]], -255 ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[I]], i32 -255 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 0 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP2]], i32 [[SEL1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[SEL1]], 0 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP1]], i32 [[SEL1]], i32 0 ; CHECK-NEXT: ret i32 [[RES]] ; %cmp1 = icmp sgt i32 %i, -255 diff -Nru llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/ObjCARC/basic.ll llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/ObjCARC/basic.ll --- llvm-toolchain-snapshot-6.0~svn315865/test/Transforms/ObjCARC/basic.ll 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/test/Transforms/ObjCARC/basic.ll 2017-10-17 14:41:15.000000000 +0000 @@ -1345,6 +1345,26 @@ C: %h = phi double* [ null, %A ], [ %p, %B ] %c = bitcast double* %h to i8* + call void @objc_release(i8* %c), !clang.imprecise_release !0 + ret void +} + +; Do not move an objc_release that doesn't have the clang.imprecise_release tag. + +; CHECK-LABEL: define void @test22_precise( +; CHECK: %[[P0:.*]] = phi double* +; CHECK: %[[V0:.*]] = bitcast double* %[[P0]] to i8* +; CHECK: call void @objc_release(i8* %[[V0]]) +; CHECK: ret void +define void @test22_precise(double* %p, i1 %a) { + br i1 %a, label %A, label %B +A: + br label %C +B: + br label %C +C: + %h = phi double* [ null, %A ], [ %p, %B ] + %c = bitcast double* %h to i8* call void @objc_release(i8* %c) ret void } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/tools/llvm-cov/CoverageSummaryInfo.cpp llvm-toolchain-snapshot-6.0~svn316003/tools/llvm-cov/CoverageSummaryInfo.cpp --- llvm-toolchain-snapshot-6.0~svn315865/tools/llvm-cov/CoverageSummaryInfo.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/tools/llvm-cov/CoverageSummaryInfo.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -43,23 +43,15 @@ if (!Mapped) return; - // Pick the max count among regions which start and end on this line, to - // avoid erroneously using the wrapped count, and to avoid picking region - // counts which come from deferred regions. - if (LineSegments.size() > 1) { - for (unsigned I = 0; I < LineSegments.size() - 1; ++I) { - if (!LineSegments[I]->IsGapRegion) - ExecutionCount = std::max(ExecutionCount, LineSegments[I]->Count); - } + // Pick the max count from the non-gap, region entry segments. If there + // aren't any, use the wrapped count. + if (!MinRegionCount) { + ExecutionCount = WrappedSegment->Count; return; } - - // If a non-gap region starts here, use its count. Otherwise use the wrapped - // count. - if (MinRegionCount == 1) - ExecutionCount = LineSegments[0]->Count; - else - ExecutionCount = WrappedSegment->Count; + for (const auto *LS : LineSegments) + if (isStartOfRegion(LS)) + ExecutionCount = std::max(ExecutionCount, LS->Count); } LineCoverageIterator &LineCoverageIterator::operator++() { diff -Nru llvm-toolchain-snapshot-6.0~svn315865/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp llvm-toolchain-snapshot-6.0~svn316003/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp --- llvm-toolchain-snapshot-6.0~svn315865/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -116,7 +116,7 @@ auto M = parseModule(Data, Size, Context); if (!M || verifyModule(*M, &errs())) { errs() << "error: input module is broken!\n"; - return 1; + return 0; } // Set up the module to build for our target. diff -Nru llvm-toolchain-snapshot-6.0~svn315865/unittests/Analysis/CMakeLists.txt llvm-toolchain-snapshot-6.0~svn316003/unittests/Analysis/CMakeLists.txt --- llvm-toolchain-snapshot-6.0~svn315865/unittests/Analysis/CMakeLists.txt 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/unittests/Analysis/CMakeLists.txt 2017-10-17 14:41:16.000000000 +0000 @@ -22,6 +22,7 @@ OrderedBasicBlockTest.cpp ProfileSummaryInfoTest.cpp ScalarEvolutionTest.cpp + SparsePropagation.cpp TargetLibraryInfoTest.cpp TBAATest.cpp UnrollAnalyzer.cpp diff -Nru llvm-toolchain-snapshot-6.0~svn315865/unittests/Analysis/ScalarEvolutionTest.cpp llvm-toolchain-snapshot-6.0~svn316003/unittests/Analysis/ScalarEvolutionTest.cpp --- llvm-toolchain-snapshot-6.0~svn315865/unittests/Analysis/ScalarEvolutionTest.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/unittests/Analysis/ScalarEvolutionTest.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -24,19 +24,11 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/SourceMgr.h" -#include "gmock/gmock.h" #include "gtest/gtest.h" namespace llvm { namespace { -MATCHER_P3(IsAffineAddRec, S, X, L, "") { - if (auto *AR = dyn_cast(arg)) - return AR->isAffine() && AR->getLoop() == L && AR->getOperand(0) == S && - AR->getOperand(1) == X; - return false; -} - // We use this fixture to ensure that we clean up ScalarEvolution before // deleting the PassManager. class ScalarEvolutionsTest : public testing::Test { @@ -894,6 +886,90 @@ 2004u); } +// Make sure that SCEV invalidates exit limits after invalidating the values it +// depends on when we forget a value. +TEST_F(ScalarEvolutionsTest, SCEVExitLimitForgetValue) { + /* + * Create the following code: + * func(i64 addrspace(10)* %arg) + * top: + * br label %L.ph + * L.ph: + * %load = load i64 addrspace(10)* %arg + * br label %L + * L: + * %phi = phi i64 [i64 0, %L.ph], [ %add, %L2 ] + * %add = add i64 %phi2, 1 + * %cond = icmp slt i64 %add, %load ; then becomes 2000. + * br i1 %cond, label %post, label %L2 + * post: + * ret void + * + */ + + // Create a module with non-integral pointers in it's datalayout + Module NIM("nonintegral", Context); + std::string DataLayout = M.getDataLayoutStr(); + if (!DataLayout.empty()) + DataLayout += "-"; + DataLayout += "ni:10"; + NIM.setDataLayout(DataLayout); + + Type *T_int64 = Type::getInt64Ty(Context); + Type *T_pint64 = T_int64->getPointerTo(10); + + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(Context), {T_pint64}, false); + Function *F = cast(NIM.getOrInsertFunction("foo", FTy)); + + Argument *Arg = &*F->arg_begin(); + + BasicBlock *Top = BasicBlock::Create(Context, "top", F); + BasicBlock *LPh = BasicBlock::Create(Context, "L.ph", F); + BasicBlock *L = BasicBlock::Create(Context, "L", F); + BasicBlock *Post = BasicBlock::Create(Context, "post", F); + + IRBuilder<> Builder(Top); + Builder.CreateBr(LPh); + + Builder.SetInsertPoint(LPh); + auto *Load = cast(Builder.CreateLoad(T_int64, Arg, "load")); + Builder.CreateBr(L); + + Builder.SetInsertPoint(L); + PHINode *Phi = Builder.CreatePHI(T_int64, 2); + auto *Add = cast( + Builder.CreateAdd(Phi, ConstantInt::get(T_int64, 1), "add")); + auto *Cond = cast( + Builder.CreateICmp(ICmpInst::ICMP_SLT, Add, Load, "cond")); + auto *Br = cast(Builder.CreateCondBr(Cond, L, Post)); + Phi->addIncoming(ConstantInt::get(T_int64, 0), LPh); + Phi->addIncoming(Add, L); + + Builder.SetInsertPoint(Post); + Builder.CreateRetVoid(); + + ScalarEvolution SE = buildSE(*F); + auto *Loop = LI->getLoopFor(L); + const SCEV *EC = SE.getBackedgeTakenCount(Loop); + EXPECT_FALSE(isa(EC)); + EXPECT_FALSE(isa(EC)); + + SE.forgetValue(Load); + Br->eraseFromParent(); + Cond->eraseFromParent(); + Load->eraseFromParent(); + + Builder.SetInsertPoint(L); + auto *NewCond = Builder.CreateICmp( + ICmpInst::ICMP_SLT, Add, ConstantInt::get(T_int64, 2000), "new.cond"); + Builder.CreateCondBr(NewCond, L, Post); + const SCEV *NewEC = SE.getBackedgeTakenCount(Loop); + EXPECT_FALSE(isa(NewEC)); + EXPECT_TRUE(isa(NewEC)); + EXPECT_EQ(cast(NewEC)->getAPInt().getLimitedValue(), 1999u); +} + TEST_F(ScalarEvolutionsTest, SCEVAddRecFromPHIwithLargeConstants) { // Reference: https://reviews.llvm.org/D37265 // Make sure that SCEV does not blow up when constructing an AddRec @@ -1006,75 +1082,6 @@ auto Result = SE.createAddRecFromPHIWithCasts(cast(Expr)); } -TEST_F(ScalarEvolutionsTest, SCEVForgetDependentLoop) { - LLVMContext C; - SMDiagnostic Err; - std::unique_ptr M = parseAssemblyString( - "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" " - " " - "define void @f(i32 %first_limit, i1* %cond) { " - "entry: " - " br label %first_loop.ph " - " " - "first_loop.ph: " - " br label %first_loop " - " " - "first_loop: " - " %iv_first = phi i32 [0, %first_loop.ph], [%iv_first.inc, %first_loop] " - " %iv_first.inc = add i32 %iv_first, 1 " - " %known_cond = icmp slt i32 %iv_first, 2000 " - " %unknown_cond = load volatile i1, i1* %cond " - " br i1 %unknown_cond, label %first_loop, label %first_loop.exit " - " " - "first_loop.exit: " - " %iv_first.3x = mul i32 %iv_first, 3 " - " %iv_first.5x = mul i32 %iv_first, 5 " - " br label %second_loop.ph " - " " - "second_loop.ph: " - " br label %second_loop " - " " - "second_loop: " - " %iv_second = phi i32 [%iv_first.3x, %second_loop.ph], [%iv_second.inc, %second_loop] " - " %iv_second.inc = add i32 %iv_second, 1 " - " %second_loop.cond = icmp ne i32 %iv_second, %iv_first.5x " - " br i1 %second_loop.cond, label %second_loop, label %second_loop.exit " - " " - "second_loop.exit: " - " ret void " - "} " - " ", - Err, C); - - assert(M && "Could not parse module?"); - assert(!verifyModule(*M) && "Must have been well formed!"); - - runWithSE(*M, "f", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { - auto &FirstIV = GetInstByName(F, "iv_first"); - auto &SecondIV = GetInstByName(F, "iv_second"); - - auto *FirstLoop = LI.getLoopFor(FirstIV.getParent()); - auto *SecondLoop = LI.getLoopFor(SecondIV.getParent()); - - auto *Zero = SE.getZero(FirstIV.getType()); - auto *Two = SE.getConstant(APInt(32, 2)); - - EXPECT_EQ(SE.getBackedgeTakenCount(FirstLoop), SE.getCouldNotCompute()); - EXPECT_THAT(SE.getBackedgeTakenCount(SecondLoop), - IsAffineAddRec(Zero, Two, FirstLoop)); - - auto &UnknownCond = GetInstByName(F, "unknown_cond"); - auto &KnownCond = GetInstByName(F, "known_cond"); - - UnknownCond.replaceAllUsesWith(&KnownCond); - - SE.forgetLoop(FirstLoop); - - EXPECT_EQ(SE.getBackedgeTakenCount(FirstLoop), SE.getConstant(APInt(32, 2000))); - EXPECT_EQ(SE.getBackedgeTakenCount(SecondLoop), SE.getConstant(APInt(32, 4000))); - }); -} - TEST_F(ScalarEvolutionsTest, SCEVFoldSumOfTruncs) { // Verify that the following SCEV gets folded to a zero: // (-1 * (trunc i64 (-1 * %0) to i32)) + (-1 * (trunc i64 %0 to i32) diff -Nru llvm-toolchain-snapshot-6.0~svn315865/unittests/Analysis/SparsePropagation.cpp llvm-toolchain-snapshot-6.0~svn316003/unittests/Analysis/SparsePropagation.cpp --- llvm-toolchain-snapshot-6.0~svn315865/unittests/Analysis/SparsePropagation.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/unittests/Analysis/SparsePropagation.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -0,0 +1,544 @@ +//===- SparsePropagation.cpp - Unit tests for the generic solver ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "gtest/gtest.h" +using namespace llvm; + +namespace { +/// To enable interprocedural analysis, we assign LLVM values to the following +/// groups. The register group represents SSA registers, the return group +/// represents the return values of functions, and the memory group represents +/// in-memory values. An LLVM Value can technically be in more than one group. +/// It's necessary to distinguish these groups so we can, for example, track a +/// global variable separately from the value stored at its location. +enum class IPOGrouping { Register, Return, Memory }; + +/// Our LatticeKeys are PointerIntPairs composed of LLVM values and groupings. +/// The PointerIntPair header provides a DenseMapInfo specialization, so using +/// these as LatticeKeys is fine. +using TestLatticeKey = PointerIntPair; +} // namespace + +namespace llvm { +/// A specialization of LatticeKeyInfo for TestLatticeKeys. The generic solver +/// must translate between LatticeKeys and LLVM Values when adding Values to +/// its work list and inspecting the state of control-flow related values. +template <> struct LatticeKeyInfo { + static inline Value *getValueFromLatticeKey(TestLatticeKey Key) { + return Key.getPointer(); + } + static inline TestLatticeKey getLatticeKeyFromValue(Value *V) { + return TestLatticeKey(V, IPOGrouping::Register); + } +}; +} // namespace llvm + +namespace { +/// This class defines a simple test lattice value that could be used for +/// solving problems similar to constant propagation. The value is maintained +/// as a PointerIntPair. +class TestLatticeVal { +public: + /// The states of the lattices value. Only the ConstantVal state is + /// interesting; the rest are special states used by the generic solver. The + /// UntrackedVal state differs from the other three in that the generic + /// solver uses it to avoid doing unnecessary work. In particular, when a + /// value moves to the UntrackedVal state, it's users are not notified. + enum TestLatticeStateTy { + UndefinedVal, + ConstantVal, + OverdefinedVal, + UntrackedVal + }; + + TestLatticeVal() : LatticeVal(nullptr, UndefinedVal) {} + TestLatticeVal(Constant *C, TestLatticeStateTy State) + : LatticeVal(C, State) {} + + /// Return true if this lattice value is in the Constant state. This is used + /// for checking the solver results. + bool isConstant() const { return LatticeVal.getInt() == ConstantVal; } + + /// Return true if this lattice value is in the Overdefined state. This is + /// used for checking the solver results. + bool isOverdefined() const { return LatticeVal.getInt() == OverdefinedVal; } + + bool operator==(const TestLatticeVal &RHS) const { + return LatticeVal == RHS.LatticeVal; + } + + bool operator!=(const TestLatticeVal &RHS) const { + return LatticeVal != RHS.LatticeVal; + } + +private: + /// A simple lattice value type for problems similar to constant propagation. + /// It holds the constant value and the lattice state. + PointerIntPair LatticeVal; +}; + +/// This class defines a simple test lattice function that could be used for +/// solving problems similar to constant propagation. The test lattice differs +/// from a "real" lattice in a few ways. First, it initializes all return +/// values, values stored in global variables, and arguments in the undefined +/// state. This means that there are no limitations on what we can track +/// interprocedurally. For simplicity, all global values in the tests will be +/// given internal linkage, since this is not something this lattice function +/// tracks. Second, it only handles the few instructions necessary for the +/// tests. +class TestLatticeFunc + : public AbstractLatticeFunction { +public: + /// Construct a new test lattice function with special values for the + /// Undefined, Overdefined, and Untracked states. + TestLatticeFunc() + : AbstractLatticeFunction( + TestLatticeVal(nullptr, TestLatticeVal::UndefinedVal), + TestLatticeVal(nullptr, TestLatticeVal::OverdefinedVal), + TestLatticeVal(nullptr, TestLatticeVal::UntrackedVal)) {} + + /// Compute and return a TestLatticeVal for the given TestLatticeKey. For the + /// test analysis, a LatticeKey will begin in the undefined state, unless it + /// represents an LLVM Constant in the register grouping. + TestLatticeVal ComputeLatticeVal(TestLatticeKey Key) override { + if (Key.getInt() == IPOGrouping::Register) + if (auto *C = dyn_cast(Key.getPointer())) + return TestLatticeVal(C, TestLatticeVal::ConstantVal); + return getUndefVal(); + } + + /// Merge the two given lattice values. This merge should be equivalent to + /// what is done for constant propagation. That is, the resulting lattice + /// value is constant only if the two given lattice values are constant and + /// hold the same value. + TestLatticeVal MergeValues(TestLatticeVal X, TestLatticeVal Y) override { + if (X == getUntrackedVal() || Y == getUntrackedVal()) + return getUntrackedVal(); + if (X == getOverdefinedVal() || Y == getOverdefinedVal()) + return getOverdefinedVal(); + if (X == getUndefVal() && Y == getUndefVal()) + return getUndefVal(); + if (X == getUndefVal()) + return Y; + if (Y == getUndefVal()) + return X; + if (X == Y) + return X; + return getOverdefinedVal(); + } + + /// Compute the lattice values that change as a result of executing the given + /// instruction. We only handle the few instructions needed for the tests. + void ComputeInstructionState( + Instruction &I, DenseMap &ChangedValues, + SparseSolver &SS) override { + switch (I.getOpcode()) { + case Instruction::Call: + return visitCallSite(cast(&I), ChangedValues, SS); + case Instruction::Ret: + return visitReturn(*cast(&I), ChangedValues, SS); + case Instruction::Store: + return visitStore(*cast(&I), ChangedValues, SS); + default: + return visitInst(I, ChangedValues, SS); + } + } + +private: + /// Handle call sites. The state of a called function's argument is the merge + /// of the current formal argument state with the call site's corresponding + /// actual argument state. The call site state is the merge of the call site + /// state with the returned value state of the called function. + void visitCallSite(CallSite CS, + DenseMap &ChangedValues, + SparseSolver &SS) { + Function *F = CS.getCalledFunction(); + Instruction *I = CS.getInstruction(); + auto RegI = TestLatticeKey(I, IPOGrouping::Register); + if (!F) { + ChangedValues[RegI] = getOverdefinedVal(); + return; + } + SS.MarkBlockExecutable(&F->front()); + for (Argument &A : F->args()) { + auto RegFormal = TestLatticeKey(&A, IPOGrouping::Register); + auto RegActual = + TestLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register); + ChangedValues[RegFormal] = + MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual)); + } + auto RetF = TestLatticeKey(F, IPOGrouping::Return); + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegI), SS.getValueState(RetF)); + } + + /// Handle return instructions. The function's return state is the merge of + /// the returned value state and the function's current return state. + void visitReturn(ReturnInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + Function *F = I.getParent()->getParent(); + if (F->getReturnType()->isVoidTy()) + return; + auto RegR = TestLatticeKey(I.getReturnValue(), IPOGrouping::Register); + auto RetF = TestLatticeKey(F, IPOGrouping::Return); + ChangedValues[RetF] = + MergeValues(SS.getValueState(RegR), SS.getValueState(RetF)); + } + + /// Handle store instructions. If the pointer operand of the store is a + /// global variable, we attempt to track the value. The global variable state + /// is the merge of the stored value state with the current global variable + /// state. + void visitStore(StoreInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + auto *GV = dyn_cast(I.getPointerOperand()); + if (!GV) + return; + auto RegVal = TestLatticeKey(I.getValueOperand(), IPOGrouping::Register); + auto MemPtr = TestLatticeKey(GV, IPOGrouping::Memory); + ChangedValues[MemPtr] = + MergeValues(SS.getValueState(RegVal), SS.getValueState(MemPtr)); + } + + /// Handle all other instructions. All other instructions are marked + /// overdefined. + void visitInst(Instruction &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + auto RegI = TestLatticeKey(&I, IPOGrouping::Register); + ChangedValues[RegI] = getOverdefinedVal(); + } +}; + +/// This class defines the common data used for all of the tests. The tests +/// should add code to the module and then run the solver. +class SparsePropagationTest : public testing::Test { +protected: + LLVMContext Context; + Module M; + IRBuilder<> Builder; + TestLatticeFunc Lattice; + SparseSolver Solver; + +public: + SparsePropagationTest() + : M("", Context), Builder(Context), Solver(&Lattice) {} +}; +} // namespace + +/// Test that we mark discovered functions executable. +/// +/// define internal void @f() { +/// call void @g() +/// ret void +/// } +/// +/// define internal void @g() { +/// call void @f() +/// ret void +/// } +/// +/// For this test, we initially mark "f" executable, and the solver discovers +/// "g" because of the call in "f". The mutually recursive call in "g" also +/// tests that we don't add a block to the basic block work list if it is +/// already executable. Doing so would put the solver into an infinite loop. +TEST_F(SparsePropagationTest, MarkBlockExecutable) { + Function *F = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "f", &M); + Function *G = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "g", &M); + BasicBlock *FEntry = BasicBlock::Create(Context, "", F); + BasicBlock *GEntry = BasicBlock::Create(Context, "", G); + Builder.SetInsertPoint(FEntry); + Builder.CreateCall(G); + Builder.CreateRetVoid(); + Builder.SetInsertPoint(GEntry); + Builder.CreateCall(F); + Builder.CreateRetVoid(); + + Solver.MarkBlockExecutable(FEntry); + Solver.Solve(); + + EXPECT_TRUE(Solver.isBlockExecutable(GEntry)); +} + +/// Test that we propagate information through global variables. +/// +/// @gv = internal global i64 +/// +/// define internal void @f() { +/// store i64 1, i64* @gv +/// ret void +/// } +/// +/// define internal void @g() { +/// store i64 1, i64* @gv +/// ret void +/// } +/// +/// For this test, we initially mark both "f" and "g" executable, and the +/// solver computes the lattice state of the global variable as constant. +TEST_F(SparsePropagationTest, GlobalVariableConstant) { + Function *F = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "f", &M); + Function *G = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "g", &M); + GlobalVariable *GV = + new GlobalVariable(M, Builder.getInt64Ty(), false, + GlobalValue::InternalLinkage, nullptr, "gv"); + BasicBlock *FEntry = BasicBlock::Create(Context, "", F); + BasicBlock *GEntry = BasicBlock::Create(Context, "", G); + Builder.SetInsertPoint(FEntry); + Builder.CreateStore(Builder.getInt64(1), GV); + Builder.CreateRetVoid(); + Builder.SetInsertPoint(GEntry); + Builder.CreateStore(Builder.getInt64(1), GV); + Builder.CreateRetVoid(); + + Solver.MarkBlockExecutable(FEntry); + Solver.MarkBlockExecutable(GEntry); + Solver.Solve(); + + auto MemGV = TestLatticeKey(GV, IPOGrouping::Memory); + EXPECT_TRUE(Solver.getExistingValueState(MemGV).isConstant()); +} + +/// Test that we propagate information through global variables. +/// +/// @gv = internal global i64 +/// +/// define internal void @f() { +/// store i64 0, i64* @gv +/// ret void +/// } +/// +/// define internal void @g() { +/// store i64 1, i64* @gv +/// ret void +/// } +/// +/// For this test, we initially mark both "f" and "g" executable, and the +/// solver computes the lattice state of the global variable as overdefined. +TEST_F(SparsePropagationTest, GlobalVariableOverDefined) { + Function *F = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "f", &M); + Function *G = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "g", &M); + GlobalVariable *GV = + new GlobalVariable(M, Builder.getInt64Ty(), false, + GlobalValue::InternalLinkage, nullptr, "gv"); + BasicBlock *FEntry = BasicBlock::Create(Context, "", F); + BasicBlock *GEntry = BasicBlock::Create(Context, "", G); + Builder.SetInsertPoint(FEntry); + Builder.CreateStore(Builder.getInt64(0), GV); + Builder.CreateRetVoid(); + Builder.SetInsertPoint(GEntry); + Builder.CreateStore(Builder.getInt64(1), GV); + Builder.CreateRetVoid(); + + Solver.MarkBlockExecutable(FEntry); + Solver.MarkBlockExecutable(GEntry); + Solver.Solve(); + + auto MemGV = TestLatticeKey(GV, IPOGrouping::Memory); + EXPECT_TRUE(Solver.getExistingValueState(MemGV).isOverdefined()); +} + +/// Test that we propagate information through function returns. +/// +/// define internal i64 @f(i1* %cond) { +/// if: +/// %0 = load i1, i1* %cond +/// br i1 %0, label %then, label %else +/// +/// then: +/// ret i64 1 +/// +/// else: +/// ret i64 1 +/// } +/// +/// For this test, we initially mark "f" executable, and the solver computes +/// the return value of the function as constant. +TEST_F(SparsePropagationTest, FunctionDefined) { + Function *F = + Function::Create(FunctionType::get(Builder.getInt64Ty(), + {Type::getInt1PtrTy(Context)}, false), + GlobalValue::InternalLinkage, "f", &M); + BasicBlock *If = BasicBlock::Create(Context, "if", F); + BasicBlock *Then = BasicBlock::Create(Context, "then", F); + BasicBlock *Else = BasicBlock::Create(Context, "else", F); + F->arg_begin()->setName("cond"); + Builder.SetInsertPoint(If); + LoadInst *Cond = Builder.CreateLoad(F->arg_begin()); + Builder.CreateCondBr(Cond, Then, Else); + Builder.SetInsertPoint(Then); + Builder.CreateRet(Builder.getInt64(1)); + Builder.SetInsertPoint(Else); + Builder.CreateRet(Builder.getInt64(1)); + + Solver.MarkBlockExecutable(If); + Solver.Solve(); + + auto RetF = TestLatticeKey(F, IPOGrouping::Return); + EXPECT_TRUE(Solver.getExistingValueState(RetF).isConstant()); +} + +/// Test that we propagate information through function returns. +/// +/// define internal i64 @f(i1* %cond) { +/// if: +/// %0 = load i1, i1* %cond +/// br i1 %0, label %then, label %else +/// +/// then: +/// ret i64 0 +/// +/// else: +/// ret i64 1 +/// } +/// +/// For this test, we initially mark "f" executable, and the solver computes +/// the return value of the function as overdefined. +TEST_F(SparsePropagationTest, FunctionOverDefined) { + Function *F = + Function::Create(FunctionType::get(Builder.getInt64Ty(), + {Type::getInt1PtrTy(Context)}, false), + GlobalValue::InternalLinkage, "f", &M); + BasicBlock *If = BasicBlock::Create(Context, "if", F); + BasicBlock *Then = BasicBlock::Create(Context, "then", F); + BasicBlock *Else = BasicBlock::Create(Context, "else", F); + F->arg_begin()->setName("cond"); + Builder.SetInsertPoint(If); + LoadInst *Cond = Builder.CreateLoad(F->arg_begin()); + Builder.CreateCondBr(Cond, Then, Else); + Builder.SetInsertPoint(Then); + Builder.CreateRet(Builder.getInt64(0)); + Builder.SetInsertPoint(Else); + Builder.CreateRet(Builder.getInt64(1)); + + Solver.MarkBlockExecutable(If); + Solver.Solve(); + + auto RetF = TestLatticeKey(F, IPOGrouping::Return); + EXPECT_TRUE(Solver.getExistingValueState(RetF).isOverdefined()); +} + +/// Test that we propagate information through arguments. +/// +/// define internal void @f() { +/// call void @g(i64 0, i64 1) +/// call void @g(i64 1, i64 1) +/// ret void +/// } +/// +/// define internal void @g(i64 %a, i64 %b) { +/// ret void +/// } +/// +/// For this test, we initially mark "f" executable, and the solver discovers +/// "g" because of the calls in "f". The solver computes the state of argument +/// "a" as overdefined and the state of "b" as constant. +/// +/// In addition, this test demonstrates that ComputeInstructionState can alter +/// the state of multiple lattice values, in addition to the one associated +/// with the instruction definition. Each call instruction in this test updates +/// the state of arguments "a" and "b". +TEST_F(SparsePropagationTest, ComputeInstructionState) { + Function *F = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "f", &M); + Function *G = Function::Create( + FunctionType::get(Builder.getVoidTy(), + {Builder.getInt64Ty(), Builder.getInt64Ty()}, false), + GlobalValue::InternalLinkage, "g", &M); + Argument *A = G->arg_begin(); + Argument *B = std::next(G->arg_begin()); + A->setName("a"); + B->setName("b"); + BasicBlock *FEntry = BasicBlock::Create(Context, "", F); + BasicBlock *GEntry = BasicBlock::Create(Context, "", G); + Builder.SetInsertPoint(FEntry); + Builder.CreateCall(G, {Builder.getInt64(0), Builder.getInt64(1)}); + Builder.CreateCall(G, {Builder.getInt64(1), Builder.getInt64(1)}); + Builder.CreateRetVoid(); + Builder.SetInsertPoint(GEntry); + Builder.CreateRetVoid(); + + Solver.MarkBlockExecutable(FEntry); + Solver.Solve(); + + auto RegA = TestLatticeKey(A, IPOGrouping::Register); + auto RegB = TestLatticeKey(B, IPOGrouping::Register); + EXPECT_TRUE(Solver.getExistingValueState(RegA).isOverdefined()); + EXPECT_TRUE(Solver.getExistingValueState(RegB).isConstant()); +} + +/// Test that we can handle exceptional terminator instructions. +/// +/// declare internal void @p() +/// +/// declare internal void @g() +/// +/// define internal void @f() personality i8* bitcast (void ()* @p to i8*) { +/// entry: +/// invoke void @g() +/// to label %exit unwind label %catch.pad +/// +/// catch.pad: +/// %0 = catchswitch within none [label %catch.body] unwind to caller +/// +/// catch.body: +/// %1 = catchpad within %0 [] +/// catchret from %1 to label %exit +/// +/// exit: +/// ret void +/// } +/// +/// For this test, we initially mark the entry block executable. The solver +/// then discovers the rest of the blocks in the function are executable. +TEST_F(SparsePropagationTest, ExceptionalTerminatorInsts) { + Function *P = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "p", &M); + Function *G = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "g", &M); + Function *F = Function::Create(FunctionType::get(Builder.getVoidTy(), false), + GlobalValue::InternalLinkage, "f", &M); + Constant *C = + ConstantExpr::getCast(Instruction::BitCast, P, Builder.getInt8PtrTy()); + F->setPersonalityFn(C); + BasicBlock *Entry = BasicBlock::Create(Context, "entry", F); + BasicBlock *Pad = BasicBlock::Create(Context, "catch.pad", F); + BasicBlock *Body = BasicBlock::Create(Context, "catch.body", F); + BasicBlock *Exit = BasicBlock::Create(Context, "exit", F); + Builder.SetInsertPoint(Entry); + Builder.CreateInvoke(G, Exit, Pad); + Builder.SetInsertPoint(Pad); + CatchSwitchInst *CatchSwitch = + Builder.CreateCatchSwitch(ConstantTokenNone::get(Context), nullptr, 1); + CatchSwitch->addHandler(Body); + Builder.SetInsertPoint(Body); + CatchPadInst *CatchPad = Builder.CreateCatchPad(CatchSwitch, {}); + Builder.CreateCatchRet(CatchPad, Exit); + Builder.SetInsertPoint(Exit); + Builder.CreateRetVoid(); + + Solver.MarkBlockExecutable(Entry); + Solver.Solve(); + + EXPECT_TRUE(Solver.isBlockExecutable(Pad)); + EXPECT_TRUE(Solver.isBlockExecutable(Body)); + EXPECT_TRUE(Solver.isBlockExecutable(Exit)); +} diff -Nru llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/CallingConvEmitter.cpp llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/CallingConvEmitter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/CallingConvEmitter.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/CallingConvEmitter.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -39,21 +39,21 @@ // Emit prototypes for all of the non-custom CC's so that they can forward ref // each other. - for (unsigned i = 0, e = CCs.size(); i != e; ++i) { - if (!CCs[i]->getValueAsBit("Custom")) { - O << "static bool " << CCs[i]->getName() + for (Record *CC : CCs) { + if (!CC->getValueAsBit("Custom")) { + O << "static bool " << CC->getName() << "(unsigned ValNo, MVT ValVT,\n" - << std::string(CCs[i]->getName().size() + 13, ' ') + << std::string(CC->getName().size() + 13, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" - << std::string(CCs[i]->getName().size() + 13, ' ') + << std::string(CC->getName().size() + 13, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n"; } } // Emit each non-custom calling convention description in full. - for (unsigned i = 0, e = CCs.size(); i != e; ++i) { - if (!CCs[i]->getValueAsBit("Custom")) - EmitCallingConv(CCs[i], O); + for (Record *CC : CCs) { + if (!CC->getValueAsBit("Custom")) + EmitCallingConv(CC, O); } } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/CodeGenDAGPatterns.cpp llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/CodeGenDAGPatterns.cpp --- llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/CodeGenDAGPatterns.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/CodeGenDAGPatterns.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -807,8 +807,14 @@ /// TreePredicateFn constructor. Here 'N' is a subclass of PatFrag. TreePredicateFn::TreePredicateFn(TreePattern *N) : PatFragRec(N) { - assert((getPredCode().empty() || getImmCode().empty()) && - ".td file corrupt: can't have a node predicate *and* an imm predicate"); + assert( + (!hasPredCode() || !hasImmCode()) && + ".td file corrupt: can't have a node predicate *and* an imm predicate"); +} + +bool TreePredicateFn::hasPredCode() const { + return isLoad() || isStore() || + !PatFragRec->getRecord()->getValueAsString("PredicateCode").empty(); } std::string TreePredicateFn::getPredCode() const { @@ -933,6 +939,10 @@ return Code; } +bool TreePredicateFn::hasImmCode() const { + return !PatFragRec->getRecord()->getValueAsString("ImmediateCode").empty(); +} + std::string TreePredicateFn::getImmCode() const { return PatFragRec->getRecord()->getValueAsString("ImmediateCode"); } @@ -1015,7 +1025,7 @@ /// isAlwaysTrue - Return true if this is a noop predicate. bool TreePredicateFn::isAlwaysTrue() const { - return getPredCode().empty() && getImmCode().empty(); + return !hasPredCode() && !hasImmCode(); } /// Return the name to use in the generated code to reference this, this is @@ -1085,7 +1095,7 @@ } // Handle arbitrary node predicates. - assert(!getPredCode().empty() && "Don't have any predicate code!"); + assert(hasPredCode() && "Don't have any predicate code!"); StringRef ClassName; if (PatFragRec->getOnlyTree()->isLeaf()) ClassName = "SDNode"; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/CodeGenDAGPatterns.h llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/CodeGenDAGPatterns.h --- llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/CodeGenDAGPatterns.h 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/CodeGenDAGPatterns.h 2017-10-17 14:41:16.000000000 +0000 @@ -447,7 +447,7 @@ /// isAlwaysTrue - Return true if this is a noop predicate. bool isAlwaysTrue() const; - bool isImmediatePattern() const { return !getImmCode().empty(); } + bool isImmediatePattern() const { return hasImmCode(); } /// getImmediatePredicateCode - Return the code that evaluates this pattern if /// this is an immediate predicate. It is an error to call this on a @@ -511,6 +511,8 @@ Record *getScalarMemoryVT() const; private: + bool hasPredCode() const; + bool hasImmCode() const; std::string getPredCode() const; std::string getImmCode() const; bool immCodeUsesAPInt() const; diff -Nru llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/DAGISelMatcher.cpp llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/DAGISelMatcher.cpp --- llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/DAGISelMatcher.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/DAGISelMatcher.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -80,18 +80,18 @@ ScopeMatcher::~ScopeMatcher() { - for (unsigned i = 0, e = Children.size(); i != e; ++i) - delete Children[i]; + for (Matcher *C : Children) + delete C; } SwitchOpcodeMatcher::~SwitchOpcodeMatcher() { - for (unsigned i = 0, e = Cases.size(); i != e; ++i) - delete Cases[i].second; + for (auto &C : Cases) + delete C.second; } SwitchTypeMatcher::~SwitchTypeMatcher() { - for (unsigned i = 0, e = Cases.size(); i != e; ++i) - delete Cases[i].second; + for (auto &C : Cases) + delete C.second; } CheckPredicateMatcher::CheckPredicateMatcher(const TreePredicateFn &pred) @@ -107,11 +107,11 @@ void ScopeMatcher::printImpl(raw_ostream &OS, unsigned indent) const { OS.indent(indent) << "Scope\n"; - for (unsigned i = 0, e = getNumChildren(); i != e; ++i) { - if (!getChild(i)) + for (const Matcher *C : Children) { + if (!C) OS.indent(indent+1) << "NULL POINTER\n"; else - getChild(i)->print(OS, indent+2); + C->print(OS, indent+2); } } @@ -162,9 +162,9 @@ void SwitchOpcodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const { OS.indent(indent) << "SwitchOpcode: {\n"; - for (unsigned i = 0, e = Cases.size(); i != e; ++i) { - OS.indent(indent) << "case " << Cases[i].first->getEnumName() << ":\n"; - Cases[i].second->print(OS, indent+2); + for (const auto &C : Cases) { + OS.indent(indent) << "case " << C.first->getEnumName() << ":\n"; + C.second->print(OS, indent+2); } OS.indent(indent) << "}\n"; } @@ -177,9 +177,9 @@ void SwitchTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const { OS.indent(indent) << "SwitchType: {\n"; - for (unsigned i = 0, e = Cases.size(); i != e; ++i) { - OS.indent(indent) << "case " << getEnumName(Cases[i].first) << ":\n"; - Cases[i].second->print(OS, indent+2); + for (const auto &C : Cases) { + OS.indent(indent) << "case " << getEnumName(C.first) << ":\n"; + C.second->print(OS, indent+2); } OS.indent(indent) << "}\n"; } diff -Nru llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/GlobalISelEmitter.cpp llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/GlobalISelEmitter.cpp --- llvm-toolchain-snapshot-6.0~svn315865/utils/TableGen/GlobalISelEmitter.cpp 2017-10-15 17:43:17.000000000 +0000 +++ llvm-toolchain-snapshot-6.0~svn316003/utils/TableGen/GlobalISelEmitter.cpp 2017-10-17 14:41:16.000000000 +0000 @@ -103,6 +103,12 @@ OS << "GILLT_v" << Ty.getNumElements() << "s" << Ty.getScalarSizeInBits(); return; } + if (Ty.isPointer()) { + OS << "GILLT_p" << Ty.getAddressSpace(); + if (Ty.getSizeInBits() > 0) + OS << "s" << Ty.getSizeInBits(); + return; + } llvm_unreachable("Unhandled LLT"); } @@ -116,6 +122,11 @@ << Ty.getScalarSizeInBits() << ")"; return; } + if (Ty.isPointer() && Ty.getSizeInBits() > 0) { + OS << "LLT::pointer(" << Ty.getAddressSpace() << ", " + << Ty.getSizeInBits() << ")"; + return; + } llvm_unreachable("Unhandled LLT"); } @@ -152,9 +163,11 @@ /// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...). static Optional MVTToLLT(MVT::SimpleValueType SVT) { MVT VT(SVT); + if (VT.isVector() && VT.getVectorNumElements() != 1) return LLTCodeGen( LLT::vector(VT.getVectorNumElements(), VT.getScalarSizeInBits())); + if (VT.isInteger() || VT.isFloatingPoint()) return LLTCodeGen(LLT::scalar(VT.getSizeInBits())); return None; @@ -228,6 +241,11 @@ if (Predicate.isImmediatePattern()) continue; + if (Predicate.isLoad() && Predicate.isUnindexed()) + continue; + + if (Predicate.isNonExtLoad()) + continue; HasUnsupportedPredicate = true; Explanation = Separator + "Has a predicate (" + explainPredicates(N) + ")"; Separator = ", "; @@ -661,6 +679,7 @@ OPM_Int, OPM_LiteralInt, OPM_LLT, + OPM_PointerToAny, OPM_RegBank, OPM_MBB, }; @@ -748,6 +767,37 @@ std::set LLTOperandMatcher::KnownTypes; +/// Generates code to check that an operand is a pointer to any address space. +/// +/// In SelectionDAG, the types did not describe pointers or address spaces. As a +/// result, iN is used to describe a pointer of N bits to any address space and +/// PatFrag predicates are typically used to constrain the address space. There's +/// no reliable means to derive the missing type information from the pattern so +/// imported rules must test the components of a pointer separately. +/// +/// If SizeInBits is zero, then the pointer size will be obtained from the +/// subtarget. +class PointerToAnyOperandMatcher : public OperandPredicateMatcher { +protected: + unsigned SizeInBits; + +public: + PointerToAnyOperandMatcher(unsigned SizeInBits) + : OperandPredicateMatcher(OPM_PointerToAny), SizeInBits(SizeInBits) {} + + static bool classof(const OperandPredicateMatcher *P) { + return P->getKind() == OPM_PointerToAny; + } + + void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule, + unsigned InsnVarID, unsigned OpIdx) const override { + Table << MatchTable::Opcode("GIM_CheckPointerToAny") << MatchTable::Comment("MI") + << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op") + << MatchTable::IntValue(OpIdx) << MatchTable::Comment("SizeInBits") + << MatchTable::IntValue(SizeInBits) << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is a particular target constant. class ComplexPatternOperandMatcher : public OperandPredicateMatcher { protected: @@ -927,6 +977,9 @@ InstructionMatcher &getInstructionMatcher() const { return Insn; } + Error addTypeCheckPredicate(const TypeSetByHwMode &VTy, + bool OperandIsAPointer); + /// Emit MatchTable opcodes to capture instructions into the MIs table. void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule, unsigned InsnVarID) const { @@ -1006,6 +1059,27 @@ return static_cast(Predicates.back().get()); } +Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy, + bool OperandIsAPointer) { + if (!VTy.isMachineValueType()) + return failedImport("unsupported typeset"); + + if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) { + addPredicate(0); + return Error::success(); + } + + auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy); + if (!OpTyOrNone) + return failedImport("unsupported type"); + + if (OperandIsAPointer) + addPredicate(OpTyOrNone->get().getSizeInBits()); + else + addPredicate(*OpTyOrNone); + return Error::success(); +} + unsigned ComplexPatternOperandMatcher::getAllocatedTemporariesBaseID() const { return Operand.getAllocatedTemporariesBaseID(); } @@ -2070,7 +2144,8 @@ Error importComplexPatternOperandMatcher(OperandMatcher &OM, Record *R, unsigned &TempOpIdx) const; Error importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher, - const TreePatternNode *SrcChild, unsigned OpIdx, + const TreePatternNode *SrcChild, + bool OperandIsAPointer, unsigned OpIdx, unsigned &TempOpIdx) const; Expected createAndImportInstructionRenderer(RuleMatcher &M, const TreePatternNode *Dst, @@ -2164,17 +2239,12 @@ unsigned OpIdx = 0; for (const TypeSetByHwMode &VTy : Src->getExtTypes()) { - auto OpTyOrNone = VTy.isMachineValueType() - ? MVTToLLT(VTy.getMachineValueType().SimpleTy) - : None; - if (!OpTyOrNone) - return failedImport( - "Result of Src pattern operator has an unsupported type"); - // Results don't have a name unless they are the root node. The caller will // set the name if appropriate. OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx); - OM.addPredicate(*OpTyOrNone); + if (auto Error = OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */)) + return failedImport(toString(std::move(Error)) + + " for result of Src pattern operator"); } for (const auto &Predicate : Src->getPredicateFns()) { @@ -2186,6 +2256,25 @@ continue; } + // No check required. A G_LOAD is an unindexed load. + if (Predicate.isLoad() && Predicate.isUnindexed()) + continue; + + // No check required. G_LOAD by itself is a non-extending load. + if (Predicate.isNonExtLoad()) + continue; + + if (Predicate.isLoad() && Predicate.getMemoryVT() != nullptr) { + Optional MemTyOrNone = + MVTToLLT(getValueType(Predicate.getMemoryVT())); + + if (!MemTyOrNone) + return failedImport("MemVT could not be converted to LLT"); + + InsnMatcher.getOperand(0).addPredicate(MemTyOrNone.getValue()); + continue; + } + return failedImport("Src pattern child has predicate (" + explainPredicates(Src) + ")"); } @@ -2217,6 +2306,13 @@ for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) { TreePatternNode *SrcChild = Src->getChild(i); + // SelectionDAG allows pointers to be represented with iN since it doesn't + // distinguish between pointers and integers but they are different types in GlobalISel. + // Coerce integers to pointers to address space 0 if the context indicates a pointer. + // TODO: Find a better way to do this, SDTCisPtrTy? + bool OperandIsAPointer = + SrcGIOrNull->TheDef->getName() == "G_LOAD" && i == 0; + // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately // following the defs is an intrinsic ID. if ((SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" || @@ -2232,8 +2328,9 @@ return failedImport("Expected IntInit containing instrinsic ID)"); } - if (auto Error = importChildMatcher(Rule, InsnMatcher, SrcChild, OpIdx++, - TempOpIdx)) + if (auto Error = + importChildMatcher(Rule, InsnMatcher, SrcChild, OperandIsAPointer, + OpIdx++, TempOpIdx)) return std::move(Error); } } @@ -2256,6 +2353,7 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const TreePatternNode *SrcChild, + bool OperandIsAPointer, unsigned OpIdx, unsigned &TempOpIdx) const { OperandMatcher &OM = @@ -2278,12 +2376,10 @@ } } - Optional OpTyOrNone = None; - if (ChildTypes.front().isMachineValueType()) - OpTyOrNone = MVTToLLT(ChildTypes.front().getMachineValueType().SimpleTy); - if (!OpTyOrNone) - return failedImport("Src operand has an unsupported type (" + to_string(*SrcChild) + ")"); - OM.addPredicate(*OpTyOrNone); + if (auto Error = + OM.addTypeCheckPredicate(ChildTypes.front(), OperandIsAPointer)) + return failedImport(toString(std::move(Error)) + " for Src operand (" + + to_string(*SrcChild) + ")"); // Check for nested instructions. if (!SrcChild->isLeaf()) { @@ -2889,20 +2985,16 @@ "ComplexRendererFn(" << Target.getName() << "InstructionSelector::*ComplexMatcherMemFn)(MachineOperand &) const;\n" - << "const MatcherInfoTy " + << " const MatcherInfoTy " "MatcherInfo;\n" + << " static " << Target.getName() + << "InstructionSelector::ComplexMatcherMemFn ComplexPredicateFns[];\n" << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n"; OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n" << ", State(" << MaxTemporaries << "),\n" << "MatcherInfo({TypeObjects, FeatureBitsets, I64ImmPredicateFns, " - "APIntImmPredicateFns, APFloatImmPredicateFns, {\n" - << " nullptr, // GICP_Invalid\n"; - for (const auto &Record : ComplexPredicates) - OS << " &" << Target.getName() - << "InstructionSelector::" << Record->getValueAsString("MatcherFn") - << ", // " << Record->getName() << "\n"; - OS << "}})\n" + "APIntImmPredicateFns, APFloatImmPredicateFns, ComplexPredicateFns})\n" << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n"; OS << "#ifdef GET_GLOBALISEL_IMPL\n"; @@ -3021,6 +3113,16 @@ emitImmPredicates(OS, "APInt", "const APInt &", [](const Record *R) { return R->getValueAsBit("IsAPInt"); }); + OS << "\n"; + + OS << Target.getName() << "InstructionSelector::ComplexMatcherMemFn\n" + << Target.getName() << "InstructionSelector::ComplexPredicateFns[] = {\n" + << " nullptr, // GICP_Invalid\n"; + for (const auto &Record : ComplexPredicates) + OS << " &" << Target.getName() + << "InstructionSelector::" << Record->getValueAsString("MatcherFn") + << ", // " << Record->getName() << "\n"; + OS << "};\n\n"; OS << "bool " << Target.getName() << "InstructionSelector::selectImpl(MachineInstr &I) const {\n"