diff -Nru llvm-toolchain-12-12.0.0/clang/docs/ReleaseNotes.rst llvm-toolchain-12-12.0.1/clang/docs/ReleaseNotes.rst --- llvm-toolchain-12-12.0.0/clang/docs/ReleaseNotes.rst 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/docs/ReleaseNotes.rst 2021-07-09 07:09:47.000000000 +0000 @@ -389,6 +389,9 @@ ``AlignConsecutiveDeclarations`` and ``AlignConsecutiveMacros`` have been modified to allow alignment across empty lines and/or comments. +- Support for Whitesmiths has been improved, with fixes for ``namespace`` blocks + and ``case`` blocks and labels. + libclang -------- diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Basic/Targets/PPC.cpp llvm-toolchain-12-12.0.1/clang/lib/Basic/Targets/PPC.cpp --- llvm-toolchain-12-12.0.0/clang/lib/Basic/Targets/PPC.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Basic/Targets/PPC.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -57,6 +57,7 @@ } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; } else if (Feature == "+spe" || Feature == "+efpu2") { + HasStrictFP = false; HasSPE = true; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Basic/Targets/X86.cpp llvm-toolchain-12-12.0.1/clang/lib/Basic/Targets/X86.cpp --- llvm-toolchain-12-12.0.0/clang/lib/Basic/Targets/X86.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Basic/Targets/X86.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -513,10 +513,11 @@ case CK_K8: case CK_K8SSE3: case CK_x86_64: + defineCPUMacros(Builder, "k8"); + break; case CK_x86_64_v2: case CK_x86_64_v3: case CK_x86_64_v4: - defineCPUMacros(Builder, "k8"); break; case CK_AMDFAM10: defineCPUMacros(Builder, "amdfam10"); diff -Nru llvm-toolchain-12-12.0.0/clang/lib/CodeGen/CGExprAgg.cpp llvm-toolchain-12-12.0.1/clang/lib/CodeGen/CGExprAgg.cpp --- llvm-toolchain-12-12.0.0/clang/lib/CodeGen/CGExprAgg.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/CodeGen/CGExprAgg.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -2056,7 +2056,7 @@ Record->hasTrivialCopyAssignment() || Record->hasTrivialMoveConstructor() || Record->hasTrivialMoveAssignment() || - Record->isUnion()) && + Record->hasAttr() || Record->isUnion()) && "Trying to aggregate-copy a type without a trivial copy/move " "constructor or assignment operator"); // Ignore empty classes in C++. diff -Nru llvm-toolchain-12-12.0.0/clang/lib/CodeGen/CGOpenMPRuntime.cpp llvm-toolchain-12-12.0.1/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- llvm-toolchain-12-12.0.0/clang/lib/CodeGen/CGOpenMPRuntime.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/CodeGen/CGOpenMPRuntime.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -409,6 +409,7 @@ llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; const CodeGen::CGBlockInfo *BlockInfo = nullptr; + bool NoInheritance = false; public: /// Constructs region for combined constructs. @@ -416,16 +417,19 @@ /// a list of functions used for code generation of implicitly inlined /// regions. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) - : CGF(CGF) { + OpenMPDirectiveKind Kind, bool HasCancel, + bool NoInheritance = true) + : CGF(CGF), NoInheritance(NoInheritance) { // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - LambdaThisCaptureField = CGF.LambdaThisCaptureField; - CGF.LambdaThisCaptureField = nullptr; - BlockInfo = CGF.BlockInfo; - CGF.BlockInfo = nullptr; + if (NoInheritance) { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + LambdaThisCaptureField = CGF.LambdaThisCaptureField; + CGF.LambdaThisCaptureField = nullptr; + BlockInfo = CGF.BlockInfo; + CGF.BlockInfo = nullptr; + } } ~InlinedOpenMPRegionRAII() { @@ -434,9 +438,11 @@ cast(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - CGF.LambdaThisCaptureField = LambdaThisCaptureField; - CGF.BlockInfo = BlockInfo; + if (NoInheritance) { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + CGF.LambdaThisCaptureField = LambdaThisCaptureField; + CGF.BlockInfo = BlockInfo; + } } }; @@ -3853,7 +3859,7 @@ // Processing for implicitly captured variables. InlinedOpenMPRegionRAII Region( CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, - /*HasCancel=*/false); + /*HasCancel=*/false, /*NoInheritance=*/true); SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } if (Type->isArrayType()) { @@ -6214,7 +6220,9 @@ bool HasCancel) { if (!CGF.HaveInsertPoint()) return; - InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); + InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, + InnerKind != OMPD_critical && + InnerKind != OMPD_master); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } diff -Nru llvm-toolchain-12-12.0.0/clang/lib/CodeGen/CodeGenModule.cpp llvm-toolchain-12-12.0.1/clang/lib/CodeGen/CodeGenModule.cpp --- llvm-toolchain-12-12.0.0/clang/lib/CodeGen/CodeGenModule.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/CodeGen/CodeGenModule.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -6215,15 +6215,17 @@ return *SanStats; } + llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); - auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); - auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); - return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy, - "__translate_sampler_initializer"), - {C}); + auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); + auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); + auto *Call = CGF.Builder.CreateCall( + CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C}); + Call->setCallingConv(Call->getCalledFunction()->getCallingConv()); + return Call; } CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Format/TokenAnnotator.cpp llvm-toolchain-12-12.0.1/clang/lib/Format/TokenAnnotator.cpp --- llvm-toolchain-12-12.0.0/clang/lib/Format/TokenAnnotator.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Format/TokenAnnotator.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -1917,12 +1917,12 @@ if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) return true; - if (Tok.Next->is(tok::l_paren) && - !(Tok.Previous && Tok.Previous->is(tok::identifier) && - Tok.Previous->Previous && - Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow, - tok::star))) - return true; + // Look for a cast `( x ) (`. + if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) { + if (Tok.Previous->is(tok::identifier) && + Tok.Previous->Previous->is(tok::l_paren)) + return true; + } if (!Tok.Next->Next) return false; diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Format/UnwrappedLineFormatter.cpp llvm-toolchain-12-12.0.1/clang/lib/Format/UnwrappedLineFormatter.cpp --- llvm-toolchain-12-12.0.0/clang/lib/Format/UnwrappedLineFormatter.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Format/UnwrappedLineFormatter.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -1281,13 +1281,6 @@ if (Newlines) Indent = NewlineIndent; - // If in Whitemsmiths mode, indent start and end of blocks - if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { - if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case, - tok::kw_default)) - Indent += Style.IndentWidth; - } - // Preprocessor directives get indented before the hash only if specified if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && (Line.Type == LT_PreprocessorDirective || diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Format/UnwrappedLineParser.cpp llvm-toolchain-12-12.0.1/clang/lib/Format/UnwrappedLineParser.cpp --- llvm-toolchain-12-12.0.0/clang/lib/Format/UnwrappedLineParser.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Format/UnwrappedLineParser.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -579,17 +579,23 @@ return h; } -void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, - bool MunchSemi) { +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, + bool MunchSemi, + bool UnindentWhitesmithsBraces) { assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && "'{' or macro block token expected"); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->setBlockKind(BK_Block); + // For Whitesmiths mode, jump to the next level prior to skipping over the + // braces. + if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + ++Line->Level; + size_t PPStartHash = computePPHash(); unsigned InitialLevel = Line->Level; - nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); + nextToken(/*LevelDifference=*/AddLevels); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -602,10 +608,16 @@ ? (UnwrappedLine::kInvalidIndex) : (CurrentLines->size() - 1 - NbPreprocessorDirectives); + // Whitesmiths is weird here. The brace needs to be indented for the namespace + // block, but the block itself may not be indented depending on the style + // settings. This allows the format to back up one level in those cases. + if (UnindentWhitesmithsBraces) + --Line->Level; + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); - if (AddLevel) - ++Line->Level; + if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) + Line->Level += AddLevels; parseLevel(/*HasOpeningBrace=*/true); if (eof()) @@ -621,7 +633,7 @@ size_t PPEndHash = computePPHash(); // Munch the closing brace. - nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); + nextToken(/*LevelDifference=*/-AddLevels); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -637,6 +649,7 @@ nextToken(); Line->Level = InitialLevel; + FormatTok->setBlockKind(BK_Block); if (PPStartHash == PPEndHash) { Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; @@ -2128,15 +2141,34 @@ if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); - bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || - (Style.NamespaceIndentation == FormatStyle::NI_Inner && - DeclarationScopeStack.size() > 1); - parseBlock(/*MustBeDeclaration=*/true, AddLevel); + unsigned AddLevels = + Style.NamespaceIndentation == FormatStyle::NI_All || + (Style.NamespaceIndentation == FormatStyle::NI_Inner && + DeclarationScopeStack.size() > 1) + ? 1u + : 0u; + bool ManageWhitesmithsBraces = + AddLevels == 0u && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; + + // If we're in Whitesmiths mode, indent the brace if we're not indenting + // the whole block. + if (ManageWhitesmithsBraces) + ++Line->Level; + + parseBlock(/*MustBeDeclaration=*/true, AddLevels, + /*MunchSemi=*/true, + /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); + // Munch the semicolon after a namespace. This is more common than one would // think. Putting the semicolon into its own line is very ugly. if (FormatTok->Tok.is(tok::semi)) nextToken(); - addUnwrappedLine(); + + addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); + + if (ManageWhitesmithsBraces) + --Line->Level; } // FIXME: Add error handling. } @@ -2222,6 +2254,11 @@ return; } + // If in Whitesmiths mode, the line with the while() needs to be indented + // to the same level as the block. + if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + ++Line->Level; + nextToken(); parseStructuralElement(); } @@ -2234,25 +2271,19 @@ if (LeftAlignLabel) Line->Level = 0; - bool RemoveWhitesmithsCaseIndent = - (!Style.IndentCaseBlocks && - Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths); - - if (RemoveWhitesmithsCaseIndent) - --Line->Level; - if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter( - this, Line->Level, Style.BraceWrapping.AfterCaseLabel, - Style.BraceWrapping.IndentBraces || RemoveWhitesmithsCaseIndent); + CompoundStatementIndenter Indenter(this, Line->Level, + Style.BraceWrapping.AfterCaseLabel, + Style.BraceWrapping.IndentBraces); parseBlock(/*MustBeDeclaration=*/false); if (FormatTok->Tok.is(tok::kw_break)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); - if (RemoveWhitesmithsCaseIndent) { + if (!Style.IndentCaseBlocks && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { Line->Level++; } } @@ -2920,17 +2951,29 @@ llvm::dbgs() << "\n"; } -void UnwrappedLineParser::addUnwrappedLine() { +void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ if (CurrentLines == &Lines) printDebugInfo(*Line); }); + + // If this line closes a block when in Whitesmiths mode, remember that + // information so that the level can be decreased after the line is added. + // This has to happen after the addition of the line since the line itself + // needs to be indented. + bool ClosesWhitesmithsBlock = + Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; + CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; + + if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) + --Line->Level; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Format/UnwrappedLineParser.h llvm-toolchain-12-12.0.1/clang/lib/Format/UnwrappedLineParser.h --- llvm-toolchain-12-12.0.0/clang/lib/Format/UnwrappedLineParser.h 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Format/UnwrappedLineParser.h 2021-07-09 07:09:47.000000000 +0000 @@ -85,8 +85,9 @@ void reset(); void parseFile(); void parseLevel(bool HasOpeningBrace); - void parseBlock(bool MustBeDeclaration, bool AddLevel = true, - bool MunchSemi = true); + void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u, + bool MunchSemi = true, + bool UnindentWhitesmithsBraces = false); void parseChildBlock(); void parsePPDirective(); void parsePPDefine(); @@ -140,7 +141,12 @@ bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); bool tryToParseSimpleAttribute(); - void addUnwrappedLine(); + + // Used by addUnwrappedLine to denote whether to keep or remove a level + // when resetting the line state. + enum class LineLevel { Remove, Keep }; + + void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove); bool eof() const; // LevelDifference is the difference of levels after and before the current // token. For example: diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Headers/ppc_wrappers/xmmintrin.h llvm-toolchain-12-12.0.1/clang/lib/Headers/ppc_wrappers/xmmintrin.h --- llvm-toolchain-12-12.0.0/clang/lib/Headers/ppc_wrappers/xmmintrin.h 2020-10-16 21:13:07.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Headers/ppc_wrappers/xmmintrin.h 2021-07-09 07:04:57.000000000 +0000 @@ -28,7 +28,7 @@ Most SSE scalar float intrinsic operations can be performed more efficiently as C language float scalar operations or optimized to use vector SIMD operations. We recommend this for new applications. */ -#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef _XMMINTRIN_H_INCLUDED @@ -62,14 +62,13 @@ /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ -typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef vector float __m128 __attribute__((__may_alias__)); /* Unaligned version of the same type. */ -typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, - __aligned__ (1))); +typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1))); /* Internal data types for implementing the intrinsics. */ -typedef float __v4sf __attribute__ ((__vector_size__ (16))); +typedef vector float __v4sf; /* Create an undefined vector. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff -Nru llvm-toolchain-12-12.0.0/clang/lib/Sema/SemaChecking.cpp llvm-toolchain-12-12.0.1/clang/lib/Sema/SemaChecking.cpp --- llvm-toolchain-12-12.0.0/clang/lib/Sema/SemaChecking.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/lib/Sema/SemaChecking.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -2623,7 +2623,10 @@ return false; const auto *CE = dyn_cast(UO->getSubExpr()); - if (!CE || CE->getCastKind() != CK_IntegralToPointer) + if (!CE) + return false; + if (CE->getCastKind() != CK_IntegralToPointer && + CE->getCastKind() != CK_NullToPointer) return false; // The integer must be from an EnumConstantDecl. diff -Nru llvm-toolchain-12-12.0.0/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c llvm-toolchain-12-12.0.1/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c --- llvm-toolchain-12-12.0.0/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c 2021-07-09 07:04:58.000000000 +0000 @@ -4,10 +4,11 @@ #define _(x, y) (__builtin_preserve_enum_value((x), (y))) enum AA { + VAL0 = 0, VAL1 = 2, VAL2 = 0xffffffff80000000UL, }; -typedef enum { VAL10 = -2, VAL11 = 0xffff8000, } __BB; +typedef enum { VAL00, VAL10 = -2, VAL11 = 0xffff8000, } __BB; unsigned unit1() { return _(*(enum AA *)VAL1, 0) + _(*(__BB *)VAL10, 1); @@ -17,10 +18,16 @@ return _(*(enum AA *)VAL2, 0) + _(*(__BB *)VAL11, 1); } +unsigned unit3() { + return _(*(enum AA *)VAL0, 0) + _(*(__BB *)VAL00, 1); +} + // CHECK: @0 = private unnamed_addr constant [7 x i8] c"VAL1:2\00", align 1 // CHECK: @1 = private unnamed_addr constant [9 x i8] c"VAL10:-2\00", align 1 // CHECK: @2 = private unnamed_addr constant [17 x i8] c"VAL2:-2147483648\00", align 1 // CHECK: @3 = private unnamed_addr constant [17 x i8] c"VAL11:4294934528\00", align 1 +// CHECK: @4 = private unnamed_addr constant [7 x i8] c"VAL0:0\00", align 1 +// CHECK: @5 = private unnamed_addr constant [8 x i8] c"VAL00:0\00", align 1 // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 0, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @0, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA:[0-9]+]] // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 1, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @1, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM:[0-9]+]] @@ -28,5 +35,8 @@ // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 2, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @2, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA]] // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 3, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @3, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM]] +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 4, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @4, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA]] +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 5, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @5, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM]] + // CHECK: ![[ENUM_AA]] = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA" // CHECK: ![[TYPEDEF_ENUM]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__BB" diff -Nru llvm-toolchain-12-12.0.0/clang/test/CodeGen/builtins-ppc-fpconstrained.c llvm-toolchain-12-12.0.1/clang/test/CodeGen/builtins-ppc-fpconstrained.c --- llvm-toolchain-12-12.0.0/clang/test/CodeGen/builtins-ppc-fpconstrained.c 2021-02-17 08:14:28.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/CodeGen/builtins-ppc-fpconstrained.c 2021-07-09 07:04:58.000000000 +0000 @@ -11,6 +11,9 @@ // RUN: -fallow-half-arguments-and-returns -S -ffp-exception-behavior=strict \ // RUN: -o - %s | FileCheck --check-prefix=CHECK-ASM \ // RUN: --check-prefix=FIXME-CHECK %s +// RUN: %clang_cc1 -triple powerpcspe -S -ffp-exception-behavior=strict \ +// RUN: -target-feature +spe -fexperimental-strict-floating-point -emit-llvm \ +// RUN: %s -o - | FileCheck --check-prefix=CHECK-CONSTRAINED %s typedef __attribute__((vector_size(4 * sizeof(float)))) float vec_float; typedef __attribute__((vector_size(2 * sizeof(double)))) double vec_double; diff -Nru llvm-toolchain-12-12.0.0/clang/test/CodeGen/ppc-xmmintrin.c llvm-toolchain-12-12.0.1/clang/test/CodeGen/ppc-xmmintrin.c --- llvm-toolchain-12-12.0.0/clang/test/CodeGen/ppc-xmmintrin.c 2020-10-16 21:13:07.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/CodeGen/ppc-xmmintrin.c 2021-07-09 07:04:58.000000000 +0000 @@ -3,8 +3,12 @@ // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns #include @@ -1426,7 +1430,7 @@ void __attribute__((noinline)) test_prefetch() { - _mm_prefetch(ms, i); + _mm_prefetch(ms, _MM_HINT_NTA); } // CHECK-LABEL: @test_prefetch diff -Nru llvm-toolchain-12-12.0.0/clang/test/CodeGenCXX/trivial_abi.cpp llvm-toolchain-12-12.0.1/clang/test/CodeGenCXX/trivial_abi.cpp --- llvm-toolchain-12-12.0.0/clang/test/CodeGenCXX/trivial_abi.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/CodeGenCXX/trivial_abi.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -262,3 +262,21 @@ void testExceptionLarge() { calleeExceptionLarge(Large(), Large()); } + +// PR42961 + +// CHECK: define{{.*}} @"_ZN3$_08__invokeEv"() +// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_SMALL]], align 8 +// CHECK: %[[COERCE:.*]] = alloca %[[STRUCT_SMALL]], align 8 +// CHECK: %[[CALL:.*]] = call{{.*}} @"_ZNK3$_0clEv" +// CHECK: %[[COERCEDIVE:.*]] = getelementptr{{.*}} %[[COERCE]] +// CHECK: %[[COERCEVALIP:.*]] = inttoptr{{.*}} %[[CALL]] +// CHECK: %[[RETVALP:.*]] = bitcast %[[STRUCT_SMALL]]* %[[RETVAL]] +// CHECK: %[[COERCEP:.*]] = bitcast %[[STRUCT_SMALL]]* %[[COERCE]] +// CHECK: call {{.*}}memcpy{{.*}} %[[RETVALP]]{{.*}} %[[COERCEP]] +// CHECK: %[[COERCEDIVE1:.*]] = getelementptr{{.*}} %[[RETVAL]] +// CHECK: %[[TMP:.*]] = load{{.*}} %[[COERCEDIVE1]] +// CHECK: %[[COERCEVALPI:.*]] = ptrtoint{{.*}} %[[TMP]] +// CHECK: ret{{.*}} %[[COERCEVALPI]] + +Small (*fp)() = []() -> Small { return Small(); }; diff -Nru llvm-toolchain-12-12.0.0/clang/test/CodeGenOpenCL/sampler.cl llvm-toolchain-12-12.0.1/clang/test/CodeGenOpenCL/sampler.cl --- llvm-toolchain-12-12.0.0/clang/test/CodeGenOpenCL/sampler.cl 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/CodeGenOpenCL/sampler.cl 2021-07-09 07:04:58.000000000 +0000 @@ -39,7 +39,7 @@ // Case 2b sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_NEAREST; // CHECK: [[smp_ptr:%[A-Za-z0-9_\.]+]] = alloca %opencl.sampler_t addrspace(2)* - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19) // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMP]], %opencl.sampler_t addrspace(2)** [[smp_ptr]] // Case 1b @@ -56,12 +56,12 @@ // Case 1a/2a fnc4smp(glb_smp); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1a/2c fnc4smp(glb_smp_const); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1c @@ -70,12 +70,12 @@ // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) fnc4smp(5); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; fnc4smp(const_smp); - // CHECK: [[CONST_SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[CONST_SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]] fnc4smp(const_smp); // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]] @@ -83,7 +83,7 @@ constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; fnc4smp(constant_smp); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // TODO: enable sampler initialization with non-constant integer. diff -Nru llvm-toolchain-12-12.0.0/clang/test/OpenMP/critical_codegen.cpp llvm-toolchain-12-12.0.1/clang/test/OpenMP/critical_codegen.cpp --- llvm-toolchain-12-12.0.0/clang/test/OpenMP/critical_codegen.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/OpenMP/critical_codegen.cpp 2021-07-09 07:04:58.000000000 +0000 @@ -68,6 +68,31 @@ return a; } +// ALL-LABEL: lambda_critical +// TERM_DEBUG-LABEL: lambda_critical +void lambda_critical(int a, int b) { + auto l = [=]() { +#pragma omp critical + { + // ALL: call void @__kmpc_critical( + int c = a + b; + } + }; + + l(); + + auto l1 = [=]() { +#pragma omp parallel +#pragma omp critical + { + // ALL: call void @__kmpc_critical( + int c = a + b; + } + }; + + l1(); +} + struct S { int a; }; diff -Nru llvm-toolchain-12-12.0.0/clang/test/OpenMP/master_codegen.cpp llvm-toolchain-12-12.0.1/clang/test/OpenMP/master_codegen.cpp --- llvm-toolchain-12-12.0.0/clang/test/OpenMP/master_codegen.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/test/OpenMP/master_codegen.cpp 2021-07-09 07:04:58.000000000 +0000 @@ -55,6 +55,41 @@ return a; } +// ALL-LABEL: lambda_master +// TERM_DEBUG-LABEL: lambda_master +void lambda_master(int a, int b) { + auto l = [=]() { +#pragma omp master + { + // ALL: call i32 @__kmpc_master( + int c = a + b; + } + }; + + l(); + + auto l1 = [=]() { +#pragma omp parallel +#pragma omp master + { + // ALL: call i32 @__kmpc_master( + int c = a + b; + } + }; + + l1(); + + auto l2 = [=]() { +#pragma omp parallel master + { + // ALL: call i32 @__kmpc_master( + int c = a + b; + } + }; + + l2(); +} + // ALL-LABEL: parallel_master // TERM_DEBUG-LABEL: parallel_master void parallel_master() { diff -Nru llvm-toolchain-12-12.0.0/clang/tools/scan-view/CMakeLists.txt llvm-toolchain-12-12.0.1/clang/tools/scan-view/CMakeLists.txt --- llvm-toolchain-12-12.0.0/clang/tools/scan-view/CMakeLists.txt 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/tools/scan-view/CMakeLists.txt 2021-07-09 07:04:58.000000000 +0000 @@ -5,6 +5,7 @@ set(ShareFiles ScanView.py + Reporter.py startfile.py bugcatcher.ico) diff -Nru llvm-toolchain-12-12.0.0/clang/tools/scan-view/share/Reporter.py llvm-toolchain-12-12.0.1/clang/tools/scan-view/share/Reporter.py --- llvm-toolchain-12-12.0.0/clang/tools/scan-view/share/Reporter.py 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/tools/scan-view/share/Reporter.py 2021-07-09 07:04:58.000000000 +0000 @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Methods for reporting bugs.""" + +import subprocess, sys, os + +__all__ = ['ReportFailure', 'BugReport', 'getReporters'] + +# + +class ReportFailure(Exception): + """Generic exception for failures in bug reporting.""" + def __init__(self, value): + self.value = value + +# Collect information about a bug. + +class BugReport(object): + def __init__(self, title, description, files): + self.title = title + self.description = description + self.files = files + +# Reporter interfaces. + +import os + +import email, mimetypes, smtplib +from email import encoders +from email.message import Message +from email.mime.base import MIMEBase +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +#===------------------------------------------------------------------------===# +# ReporterParameter +#===------------------------------------------------------------------------===# + +class ReporterParameter(object): + def __init__(self, n): + self.name = n + def getName(self): + return self.name + def getValue(self,r,bugtype,getConfigOption): + return getConfigOption(r.getName(),self.getName()) + def saveConfigValue(self): + return True + +class TextParameter (ReporterParameter): + def getHTML(self,r,bugtype,getConfigOption): + return """\ + +%s: + +"""%(self.getName(),r.getName(),self.getName(),self.getValue(r,bugtype,getConfigOption)) + +class SelectionParameter (ReporterParameter): + def __init__(self, n, values): + ReporterParameter.__init__(self,n) + self.values = values + + def getHTML(self,r,bugtype,getConfigOption): + default = self.getValue(r,bugtype,getConfigOption) + return """\ + +%s:"""%(self.getName(),r.getName(),self.getName(),'\n'.join(["""\ +"""%(o[0], + o[0] == default and ' selected="selected"' or '', + o[1]) for o in self.values])) + +#===------------------------------------------------------------------------===# +# Reporters +#===------------------------------------------------------------------------===# + +class EmailReporter(object): + def getName(self): + return 'Email' + + def getParameters(self): + return [TextParameter(x) for x in ['To', 'From', 'SMTP Server', 'SMTP Port']] + + # Lifted from python email module examples. + def attachFile(self, outer, path): + # Guess the content type based on the file's extension. Encoding + # will be ignored, although we should check for simple things like + # gzip'd or compressed files. + ctype, encoding = mimetypes.guess_type(path) + if ctype is None or encoding is not None: + # No guess could be made, or the file is encoded (compressed), so + # use a generic bag-of-bits type. + ctype = 'application/octet-stream' + maintype, subtype = ctype.split('/', 1) + if maintype == 'text': + fp = open(path) + # Note: we should handle calculating the charset + msg = MIMEText(fp.read(), _subtype=subtype) + fp.close() + else: + fp = open(path, 'rb') + msg = MIMEBase(maintype, subtype) + msg.set_payload(fp.read()) + fp.close() + # Encode the payload using Base64 + encoders.encode_base64(msg) + # Set the filename parameter + msg.add_header('Content-Disposition', 'attachment', filename=os.path.basename(path)) + outer.attach(msg) + + def fileReport(self, report, parameters): + mainMsg = """\ +BUG REPORT +--- +Title: %s +Description: %s +"""%(report.title, report.description) + + if not parameters.get('To'): + raise ReportFailure('No "To" address specified.') + if not parameters.get('From'): + raise ReportFailure('No "From" address specified.') + + msg = MIMEMultipart() + msg['Subject'] = 'BUG REPORT: %s'%(report.title) + # FIXME: Get config parameters + msg['To'] = parameters.get('To') + msg['From'] = parameters.get('From') + msg.preamble = mainMsg + + msg.attach(MIMEText(mainMsg, _subtype='text/plain')) + for file in report.files: + self.attachFile(msg, file) + + try: + s = smtplib.SMTP(host=parameters.get('SMTP Server'), + port=parameters.get('SMTP Port')) + s.sendmail(msg['From'], msg['To'], msg.as_string()) + s.close() + except: + raise ReportFailure('Unable to send message via SMTP.') + + return "Message sent!" + +class BugzillaReporter(object): + def getName(self): + return 'Bugzilla' + + def getParameters(self): + return [TextParameter(x) for x in ['URL','Product']] + + def fileReport(self, report, parameters): + raise NotImplementedError + + +class RadarClassificationParameter(SelectionParameter): + def __init__(self): + SelectionParameter.__init__(self,"Classification", + [['1', 'Security'], ['2', 'Crash/Hang/Data Loss'], + ['3', 'Performance'], ['4', 'UI/Usability'], + ['6', 'Serious Bug'], ['7', 'Other']]) + + def saveConfigValue(self): + return False + + def getValue(self,r,bugtype,getConfigOption): + if bugtype.find("leak") != -1: + return '3' + elif bugtype.find("dereference") != -1: + return '2' + elif bugtype.find("missing ivar release") != -1: + return '3' + else: + return '7' + +### + +def getReporters(): + reporters = [] + reporters.append(EmailReporter()) + return reporters + diff -Nru llvm-toolchain-12-12.0.0/clang/unittests/Format/FormatTest.cpp llvm-toolchain-12-12.0.1/clang/unittests/Format/FormatTest.cpp --- llvm-toolchain-12-12.0.0/clang/unittests/Format/FormatTest.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang/unittests/Format/FormatTest.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -12368,6 +12368,17 @@ verifyFormat("size_t idx = (a->*foo)(a - 1);", Spaces); verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (*(foo))(a - 1);", Spaces); + Spaces.ColumnLimit = 80; + Spaces.IndentWidth = 4; + Spaces.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + verifyFormat("void foo( ) {\n" + " size_t foo = (*(function))(\n" + " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " + "BarrrrrrrrrrrrLong,\n" + " FoooooooooLooooong);\n" + "}", + Spaces); Spaces.SpaceAfterCStyleCast = false; verifyFormat("size_t idx = (size_t)(ptr - ((char *)file));", Spaces); verifyFormat("size_t idx = (size_t)a;", Spaces); @@ -12375,6 +12386,15 @@ verifyFormat("size_t idx = (a->*foo)(a - 1);", Spaces); verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (*(foo))(a - 1);", Spaces); + + verifyFormat("void foo( ) {\n" + " size_t foo = (*(function))(\n" + " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " + "BarrrrrrrrrrrrLong,\n" + " FoooooooooLooooong);\n" + "}", + Spaces); } TEST_F(FormatTest, ConfigurableSpacesInSquareBrackets) { @@ -14721,6 +14741,7 @@ WhitesmithsBraceStyle); */ + WhitesmithsBraceStyle.NamespaceIndentation = FormatStyle::NI_None; verifyFormat("namespace a\n" " {\n" "class A\n" @@ -14745,6 +14766,89 @@ " } // namespace a", WhitesmithsBraceStyle); + verifyFormat("namespace a\n" + " {\n" + "namespace b\n" + " {\n" + "class A\n" + " {\n" + " void f()\n" + " {\n" + " if (true)\n" + " {\n" + " a();\n" + " b();\n" + " }\n" + " }\n" + " void g()\n" + " {\n" + " return;\n" + " }\n" + " };\n" + "struct B\n" + " {\n" + " int x;\n" + " };\n" + " } // namespace b\n" + " } // namespace a", + WhitesmithsBraceStyle); + + WhitesmithsBraceStyle.NamespaceIndentation = FormatStyle::NI_Inner; + verifyFormat("namespace a\n" + " {\n" + "namespace b\n" + " {\n" + " class A\n" + " {\n" + " void f()\n" + " {\n" + " if (true)\n" + " {\n" + " a();\n" + " b();\n" + " }\n" + " }\n" + " void g()\n" + " {\n" + " return;\n" + " }\n" + " };\n" + " struct B\n" + " {\n" + " int x;\n" + " };\n" + " } // namespace b\n" + " } // namespace a", + WhitesmithsBraceStyle); + + WhitesmithsBraceStyle.NamespaceIndentation = FormatStyle::NI_All; + verifyFormat("namespace a\n" + " {\n" + " namespace b\n" + " {\n" + " class A\n" + " {\n" + " void f()\n" + " {\n" + " if (true)\n" + " {\n" + " a();\n" + " b();\n" + " }\n" + " }\n" + " void g()\n" + " {\n" + " return;\n" + " }\n" + " };\n" + " struct B\n" + " {\n" + " int x;\n" + " };\n" + " } // namespace b\n" + " } // namespace a", + WhitesmithsBraceStyle); + verifyFormat("void f()\n" " {\n" " if (true)\n" @@ -14779,7 +14883,7 @@ " }\n", WhitesmithsBraceStyle); - WhitesmithsBraceStyle.IndentCaseBlocks = true; + WhitesmithsBraceStyle.IndentCaseLabels = true; verifyFormat("void switchTest1(int a)\n" " {\n" " switch (a)\n" @@ -14787,7 +14891,7 @@ " case 2:\n" " {\n" " }\n" - " break;\n" + " break;\n" " }\n" " }\n", WhitesmithsBraceStyle); @@ -14797,7 +14901,7 @@ " switch (a)\n" " {\n" " case 0:\n" - " break;\n" + " break;\n" " case 1:\n" " {\n" " break;\n" @@ -14805,9 +14909,9 @@ " case 2:\n" " {\n" " }\n" - " break;\n" + " break;\n" " default:\n" - " break;\n" + " break;\n" " }\n" " }\n", WhitesmithsBraceStyle); @@ -14820,17 +14924,17 @@ " {\n" " foo(x);\n" " }\n" - " break;\n" + " break;\n" " default:\n" " {\n" " foo(1);\n" " }\n" - " break;\n" + " break;\n" " }\n" " }\n", WhitesmithsBraceStyle); - WhitesmithsBraceStyle.IndentCaseBlocks = false; + WhitesmithsBraceStyle.IndentCaseLabels = false; verifyFormat("void switchTest4(int a)\n" " {\n" diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigCompile.cpp llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigCompile.cpp --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigCompile.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigCompile.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -101,6 +101,7 @@ llvm::SourceMgr *SourceMgr; // Normalized Fragment::SourceInfo::Directory. std::string FragmentDirectory; + bool Trusted = false; llvm::Optional compileRegex(const Located &Text, @@ -183,6 +184,7 @@ } void compile(Fragment &&F) { + Trusted = F.Source.Trusted; if (!F.Source.Directory.empty()) { FragmentDirectory = llvm::sys::path::convert_to_slash(F.Source.Directory); if (FragmentDirectory.back() != '/') @@ -319,10 +321,18 @@ void compile(Fragment::IndexBlock::ExternalBlock &&External, llvm::SMRange BlockRange) { + if (External.Server && !Trusted) { + diag(Error, + "Remote index may not be specified by untrusted configuration. " + "Copy this into user config to use it.", + External.Server->Range); + return; + } #ifndef CLANGD_ENABLE_REMOTE if (External.Server) { - diag(Error, "Clangd isn't compiled with remote index support, ignoring " - "Server." External.Server->Range); + elog("Clangd isn't compiled with remote index support, ignoring Server: " + "{0}", + *External.Server); External.Server.reset(); } #endif @@ -488,8 +498,8 @@ trace::Span Tracer("ConfigCompile"); SPAN_ATTACH(Tracer, "ConfigFile", ConfigFile); auto Result = std::make_shared(); - vlog("Config fragment: compiling {0}:{1} -> {2}", ConfigFile, LineCol.first, - Result.get()); + vlog("Config fragment: compiling {0}:{1} -> {2} (trusted={3})", ConfigFile, + LineCol.first, Result.get(), Source.Trusted); FragmentCompiler{*Result, D, Source.Manager.get()}.compile(std::move(*this)); // Return as cheaply-copyable wrapper. diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigFragment.h llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigFragment.h --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigFragment.h 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigFragment.h 2021-07-09 07:09:47.000000000 +0000 @@ -94,6 +94,9 @@ /// Absolute path to directory the fragment is associated with. Relative /// paths mentioned in the fragment are resolved against this. std::string Directory; + /// Whether this fragment is allowed to make critical security/privacy + /// decisions. + bool Trusted = false; }; SourceInfo Source; diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigProvider.cpp llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigProvider.cpp --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigProvider.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigProvider.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -34,7 +34,7 @@ : FileCache(Path), Directory(Directory) {} void get(const ThreadsafeFS &TFS, DiagnosticCallback DC, - std::chrono::steady_clock::time_point FreshTime, + std::chrono::steady_clock::time_point FreshTime, bool Trusted, std::vector &Out) const { read( TFS, FreshTime, @@ -43,6 +43,7 @@ if (Data) for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) { Fragment.Source.Directory = Directory; + Fragment.Source.Trusted = Trusted; CachedValue.push_back(std::move(Fragment).compile(DC)); } }, @@ -52,35 +53,38 @@ std::unique_ptr Provider::fromYAMLFile(llvm::StringRef AbsPath, llvm::StringRef Directory, - const ThreadsafeFS &FS) { + const ThreadsafeFS &FS, + bool Trusted) { class AbsFileProvider : public Provider { mutable FileConfigCache Cache; // threadsafe const ThreadsafeFS &FS; + bool Trusted; std::vector getFragments(const Params &P, DiagnosticCallback DC) const override { std::vector Result; - Cache.get(FS, DC, P.FreshTime, Result); + Cache.get(FS, DC, P.FreshTime, Trusted, Result); return Result; }; public: AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory, - const ThreadsafeFS &FS) - : Cache(Path, Directory), FS(FS) { + const ThreadsafeFS &FS, bool Trusted) + : Cache(Path, Directory), FS(FS), Trusted(Trusted) { assert(llvm::sys::path::is_absolute(Path)); } }; - return std::make_unique(AbsPath, Directory, FS); + return std::make_unique(AbsPath, Directory, FS, Trusted); } std::unique_ptr Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, - const ThreadsafeFS &FS) { + const ThreadsafeFS &FS, bool Trusted) { class RelFileProvider : public Provider { std::string RelPath; const ThreadsafeFS &FS; + bool Trusted; mutable std::mutex Mu; // Keys are the (posix-style) ancestor directory, not the config within it. @@ -128,18 +132,19 @@ // This will take a (per-file) lock for each file that actually exists. std::vector Result; for (FileConfigCache *Cache : Caches) - Cache->get(FS, DC, P.FreshTime, Result); + Cache->get(FS, DC, P.FreshTime, Trusted, Result); return Result; }; public: - RelFileProvider(llvm::StringRef RelPath, const ThreadsafeFS &FS) - : RelPath(RelPath), FS(FS) { + RelFileProvider(llvm::StringRef RelPath, const ThreadsafeFS &FS, + bool Trusted) + : RelPath(RelPath), FS(FS), Trusted(Trusted) { assert(llvm::sys::path::is_relative(RelPath)); } }; - return std::make_unique(RelPath, FS); + return std::make_unique(RelPath, FS, Trusted); } std::unique_ptr diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigProvider.h llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigProvider.h --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/ConfigProvider.h 2021-02-17 08:14:28.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/ConfigProvider.h 2021-07-09 07:04:57.000000000 +0000 @@ -69,7 +69,8 @@ /// Directory will be used to resolve relative paths in the fragments. static std::unique_ptr fromYAMLFile(llvm::StringRef AbsPath, llvm::StringRef Directory, - const ThreadsafeFS &); + const ThreadsafeFS &, + bool Trusted = false); // Reads fragments from YAML files found relative to ancestors of Params.Path. // // All fragments that exist are returned, starting from distant ancestors. @@ -78,7 +79,8 @@ // // If Params does not specify a path, no fragments are returned. static std::unique_ptr - fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, const ThreadsafeFS &); + fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, const ThreadsafeFS &, + bool Trusted = false); /// A provider that includes fragments from all the supplied providers. /// Order is preserved; later providers take precedence over earlier ones. diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/quality/CompletionModel.cmake llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/quality/CompletionModel.cmake --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/quality/CompletionModel.cmake 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/quality/CompletionModel.cmake 2021-07-09 07:04:57.000000000 +0000 @@ -4,9 +4,10 @@ # ${CMAKE_CURRENT_BINARY_DIR}. The generated header # will define a C++ class called ${cpp_class} - which may be a # namespace-qualified class name. +set(CLANGD_COMPLETION_MODEL_COMPILER ${CMAKE_CURRENT_LIST_DIR}/CompletionModelCodegen.py) function(gen_decision_forest model filename cpp_class) - set(model_compiler ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py) - + set(model_compiler ${CLANGD_COMPLETION_MODEL_COMPILER}) + set(output_dir ${CMAKE_CURRENT_BINARY_DIR}) set(header_file ${output_dir}/${filename}.h) set(cpp_file ${output_dir}/${filename}.cpp) diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/tool/ClangdMain.cpp llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/tool/ClangdMain.cpp --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/tool/ClangdMain.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/tool/ClangdMain.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -831,8 +831,8 @@ if (llvm::sys::path::user_config_directory(UserConfig)) { llvm::sys::path::append(UserConfig, "clangd", "config.yaml"); vlog("User config file is {0}", UserConfig); - ProviderStack.push_back( - config::Provider::fromYAMLFile(UserConfig, /*Directory=*/"", TFS)); + ProviderStack.push_back(config::Provider::fromYAMLFile( + UserConfig, /*Directory=*/"", TFS, /*Trusted=*/true)); } else { elog("Couldn't determine user config file, not loading"); } diff -Nru llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp --- llvm-toolchain-12-12.0.0/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -318,21 +318,35 @@ DiagKind(llvm::SourceMgr::DK_Warning)))); } +TEST_F(ConfigCompileTests, ExternalServerNeedsTrusted) { + Fragment::IndexBlock::ExternalBlock External; + External.Server.emplace("xxx"); + Frag.Index.External = std::move(External); + compileAndApply(); + EXPECT_THAT( + Diags.Diagnostics, + ElementsAre(DiagMessage( + "Remote index may not be specified by untrusted configuration. " + "Copy this into user config to use it."))); + EXPECT_FALSE(Conf.Index.External.hasValue()); +} + TEST_F(ConfigCompileTests, ExternalBlockWarnOnMultipleSource) { + Frag.Source.Trusted = true; Fragment::IndexBlock::ExternalBlock External; External.File.emplace(""); External.Server.emplace(""); Frag.Index.External = std::move(External); compileAndApply(); - llvm::StringLiteral ExpectedDiag = #ifdef CLANGD_ENABLE_REMOTE - "Exactly one of File or Server must be set."; + EXPECT_THAT( + Diags.Diagnostics, + Contains(AllOf(DiagMessage("Exactly one of File or Server must be set."), + DiagKind(llvm::SourceMgr::DK_Error)))); #else - "Clangd isn't compiled with remote index support, ignoring Server."; + ASSERT_TRUE(Conf.Index.External.hasValue()); + EXPECT_EQ(Conf.Index.External->Kind, Config::ExternalIndexSpec::File); #endif - EXPECT_THAT(Diags.Diagnostics, - Contains(AllOf(DiagMessage(ExpectedDiag), - DiagKind(llvm::SourceMgr::DK_Error)))); } TEST_F(ConfigCompileTests, ExternalBlockErrOnNoSource) { diff -Nru llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc --- llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc 2021-07-09 07:04:58.000000000 +0000 @@ -370,15 +370,6 @@ #if SANITIZER_GLIBC // _(SIOCDEVPLIP, WRITE, struct_ifreq_sz); // the same as EQL_ENSLAVE - _(CYGETDEFTHRESH, WRITE, sizeof(int)); - _(CYGETDEFTIMEOUT, WRITE, sizeof(int)); - _(CYGETMON, WRITE, struct_cyclades_monitor_sz); - _(CYGETTHRESH, WRITE, sizeof(int)); - _(CYGETTIMEOUT, WRITE, sizeof(int)); - _(CYSETDEFTHRESH, NONE, 0); - _(CYSETDEFTIMEOUT, NONE, 0); - _(CYSETTHRESH, NONE, 0); - _(CYSETTIMEOUT, NONE, 0); _(EQL_EMANCIPATE, WRITE, struct_ifreq_sz); _(EQL_ENSLAVE, WRITE, struct_ifreq_sz); _(EQL_GETMASTRCFG, WRITE, struct_ifreq_sz); diff -Nru llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp --- llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -143,7 +143,6 @@ # include #endif #include -#include #include #include #include @@ -459,7 +458,6 @@ #if SANITIZER_GLIBC unsigned struct_ax25_parms_struct_sz = sizeof(struct ax25_parms_struct); - unsigned struct_cyclades_monitor_sz = sizeof(struct cyclades_monitor); #if EV_VERSION > (0x010000) unsigned struct_input_keymap_entry_sz = sizeof(struct input_keymap_entry); #else @@ -823,15 +821,6 @@ #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID - unsigned IOCTL_CYGETDEFTHRESH = CYGETDEFTHRESH; - unsigned IOCTL_CYGETDEFTIMEOUT = CYGETDEFTIMEOUT; - unsigned IOCTL_CYGETMON = CYGETMON; - unsigned IOCTL_CYGETTHRESH = CYGETTHRESH; - unsigned IOCTL_CYGETTIMEOUT = CYGETTIMEOUT; - unsigned IOCTL_CYSETDEFTHRESH = CYSETDEFTHRESH; - unsigned IOCTL_CYSETDEFTIMEOUT = CYSETDEFTIMEOUT; - unsigned IOCTL_CYSETTHRESH = CYSETTHRESH; - unsigned IOCTL_CYSETTIMEOUT = CYSETTIMEOUT; unsigned IOCTL_EQL_EMANCIPATE = EQL_EMANCIPATE; unsigned IOCTL_EQL_ENSLAVE = EQL_ENSLAVE; unsigned IOCTL_EQL_GETMASTRCFG = EQL_GETMASTRCFG; diff -Nru llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h --- llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h 2021-07-09 07:04:58.000000000 +0000 @@ -983,7 +983,6 @@ #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_ax25_parms_struct_sz; -extern unsigned struct_cyclades_monitor_sz; extern unsigned struct_input_keymap_entry_sz; extern unsigned struct_ipx_config_data_sz; extern unsigned struct_kbdiacrs_sz; @@ -1328,15 +1327,6 @@ #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID -extern unsigned IOCTL_CYGETDEFTHRESH; -extern unsigned IOCTL_CYGETDEFTIMEOUT; -extern unsigned IOCTL_CYGETMON; -extern unsigned IOCTL_CYGETTHRESH; -extern unsigned IOCTL_CYGETTIMEOUT; -extern unsigned IOCTL_CYSETDEFTHRESH; -extern unsigned IOCTL_CYSETDEFTIMEOUT; -extern unsigned IOCTL_CYSETTHRESH; -extern unsigned IOCTL_CYSETTIMEOUT; extern unsigned IOCTL_EQL_EMANCIPATE; extern unsigned IOCTL_EQL_ENSLAVE; extern unsigned IOCTL_EQL_GETMASTRCFG; diff -Nru llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp --- llvm-toolchain-12-12.0.0/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -165,7 +165,12 @@ #if !SANITIZER_GO // TODO(glider): different tools may require different altstack size. -static const uptr kAltStackSize = SIGSTKSZ * 4; // SIGSTKSZ is not enough. +static uptr GetAltStackSize() { + // Note: since GLIBC_2.31, SIGSTKSZ may be a function call, so this may be + // more costly that you think. However GetAltStackSize is only call 2-3 times + // per thread so don't cache the evaluation. + return SIGSTKSZ * 4; +} void SetAlternateSignalStack() { stack_t altstack, oldstack; @@ -176,10 +181,10 @@ // TODO(glider): the mapped stack should have the MAP_STACK flag in the // future. It is not required by man 2 sigaltstack now (they're using // malloc()). - void* base = MmapOrDie(kAltStackSize, __func__); + void *base = MmapOrDie(GetAltStackSize(), __func__); altstack.ss_sp = (char*) base; altstack.ss_flags = 0; - altstack.ss_size = kAltStackSize; + altstack.ss_size = GetAltStackSize(); CHECK_EQ(0, sigaltstack(&altstack, nullptr)); } @@ -187,7 +192,7 @@ stack_t altstack, oldstack; altstack.ss_sp = nullptr; altstack.ss_flags = SS_DISABLE; - altstack.ss_size = kAltStackSize; // Some sane value required on Darwin. + altstack.ss_size = GetAltStackSize(); // Some sane value required on Darwin. CHECK_EQ(0, sigaltstack(&altstack, &oldstack)); UnmapOrDie(oldstack.ss_sp, oldstack.ss_size); } diff -Nru llvm-toolchain-12-12.0.0/debian/changelog llvm-toolchain-12-12.0.1/debian/changelog --- llvm-toolchain-12-12.0.0/debian/changelog 2021-05-05 18:43:44.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/changelog 2021-07-10 10:24:48.000000000 +0000 @@ -1,8 +1,54 @@ -llvm-toolchain-12 (1:12.0.0-3~oibaf~f) focal; urgency=medium +llvm-toolchain-12 (1:12.0.1-1~oibaf~f) focal; urgency=medium * No-change backport to focal - -- Fabio Pedretti Wed, 05 May 2021 20:43:44 +0200 + -- Fabio Pedretti Sat, 10 Jul 2021 12:24:48 +0200 + +llvm-toolchain-12 (1:12.0.1-1) unstable; urgency=medium + + * New upsream release + + -- Sylvestre Ledru Fri, 09 Jul 2021 09:13:12 +0200 + +llvm-toolchain-12 (1:12.0.1~+rc4-1) unstable; urgency=medium + + * New testing upstream release + + -- Sylvestre Ledru Sat, 03 Jul 2021 00:17:36 +0200 + +llvm-toolchain-12 (1:12.0.1~+rc3-1) unstable; urgency=medium + + * New testing upstream release + * Fix the path to analyze-{cc,c++} for scan-build-py with + --intercept-first (scan-build-py-fix-analyze-path.diff) + + -- Sylvestre Ledru Sat, 26 Jun 2021 10:00:56 +0200 + +llvm-toolchain-12 (1:12.0.1~+rc2-1) unstable; urgency=medium + + * New testing upstream release + + -- Sylvestre Ledru Mon, 21 Jun 2021 16:06:36 +0200 + +llvm-toolchain-12 (1:12.0.1~+rc1-1) unstable; urgency=medium + + [ John Paul Adrian Glaubitz ] + * Add upstream patch D98574 to fix clang macro definitions on sparc64 + * Add upstream patch D98575 to fix 32-bit compiler-rt build on sparc64 + + [ Sylvestre Ledru ] + * backport D101773 to fix an llvm ir issue on mipsel + in mips/mips-D101773-reloc.patch + thanks to Ximin Luo + (Closes: #988965) + + -- Sylvestre Ledru Fri, 28 May 2021 23:27:27 +0200 + +llvm-toolchain-12 (1:12.0.1~+rc1-1~exp1) unstable; urgency=medium + + * New testing upstream release + + -- Sylvestre Ledru Wed, 26 May 2021 13:10:29 +0200 llvm-toolchain-12 (1:12.0.0-3) unstable; urgency=medium diff -Nru llvm-toolchain-12-12.0.0/debian/patches/26-set-correct-float-abi.diff llvm-toolchain-12-12.0.1/debian/patches/26-set-correct-float-abi.diff --- llvm-toolchain-12-12.0.0/debian/patches/26-set-correct-float-abi.diff 2021-02-04 12:38:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/26-set-correct-float-abi.diff 2021-06-21 14:43:53.000000000 +0000 @@ -18,11 +18,11 @@ Reviewed-By: Last-Update: -Index: llvm-toolchain-snapshot_12~++20201021043814+26459e6d8ee/clang/lib/Driver/ToolChains/Arch/ARM.cpp +Index: llvm-toolchain-12_12.0.1~+rc2/clang/lib/Driver/ToolChains/Arch/ARM.cpp =================================================================== ---- llvm-toolchain-snapshot_12~++20201021043814+26459e6d8ee.orig/clang/lib/Driver/ToolChains/Arch/ARM.cpp -+++ llvm-toolchain-snapshot_12~++20201021043814+26459e6d8ee/clang/lib/Driver/ToolChains/Arch/ARM.cpp -@@ -214,7 +214,7 @@ arm::FloatABI arm::getDefaultFloatABI(co +--- llvm-toolchain-12_12.0.1~+rc2.orig/clang/lib/Driver/ToolChains/Arch/ARM.cpp ++++ llvm-toolchain-12_12.0.1~+rc2/clang/lib/Driver/ToolChains/Arch/ARM.cpp +@@ -220,7 +220,7 @@ arm::FloatABI arm::getDefaultFloatABI(co case llvm::Triple::MuslEABI: case llvm::Triple::EABI: // EABI is always AAPCS, and if it was not marked 'hard', it's softfp diff -Nru llvm-toolchain-12-12.0.0/debian/patches/mips/mips-D101773-reloc.patch llvm-toolchain-12-12.0.1/debian/patches/mips/mips-D101773-reloc.patch --- llvm-toolchain-12-12.0.0/debian/patches/mips/mips-D101773-reloc.patch 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/mips/mips-D101773-reloc.patch 2021-06-21 14:43:53.000000000 +0000 @@ -0,0 +1,48 @@ +From ab40c027f0ce9492919a72ad339de40bdb84b354 Mon Sep 17 00:00:00 2001 +From: Dimitry Andric +Date: Mon, 3 May 2021 20:08:49 +0200 +Subject: [PATCH] [MC][ELF] Work around R_MIPS_LO16 relocation handling problem + +This fixes PR49821, and avoids "ld.lld: error: test.o:(.rodata.str1.1): +offset is outside the section" errors when linking MIPS objects with +negative R_MIPS_LO16 implicit addends. + +ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so it +doesn't know that an R_MIPS_HI16 with implicit addend 1 and an +R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in +range of a MergeInputSection. We could introduce a new RelExpr member +(like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12) +but the complexity is unnecessary given that GNU as keeps the original +symbol for this case as well. + +Reviewed By: atanasyan, MaskRay + +Differential Revision: https://reviews.llvm.org/D101773 +--- + llvm/lib/MC/ELFObjectWriter.cpp | 11 +++++++++++ + llvm/test/MC/Mips/mips_lo16.s | 22 ++++++++++++++++++++++ + 2 files changed, 33 insertions(+) + create mode 100644 llvm/test/MC/Mips/mips_lo16.s + +Index: llvm-toolchain-12_12.0.1~+rc2/llvm/lib/MC/ELFObjectWriter.cpp +=================================================================== +--- llvm-toolchain-12_12.0.1~+rc2.orig/llvm/lib/MC/ELFObjectWriter.cpp ++++ llvm-toolchain-12_12.0.1~+rc2/llvm/lib/MC/ELFObjectWriter.cpp +@@ -1408,6 +1408,17 @@ bool ELFObjectWriter::shouldRelocateWith + if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS && + !hasRelocationAddend()) + return true; ++ ++ // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so ++ // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an ++ // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in ++ // range of a MergeInputSection. We could introduce a new RelExpr member ++ // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12) ++ // but the complexity is unnecessary given that GNU as keeps the original ++ // symbol for this case as well. ++ if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS && ++ !hasRelocationAddend()) ++ return true; + } + + // Most TLS relocations use a got, so they need the symbol. Even those that diff -Nru llvm-toolchain-12-12.0.0/debian/patches/mips/mips-force-nomadd4.patch llvm-toolchain-12-12.0.1/debian/patches/mips/mips-force-nomadd4.patch --- llvm-toolchain-12-12.0.0/debian/patches/mips/mips-force-nomadd4.patch 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/mips/mips-force-nomadd4.patch 2021-06-21 14:43:53.000000000 +0000 @@ -0,0 +1,66 @@ +The MIPS port aims to support the Loongson 3 family of CPUs in addition +of the other MIPS CPUs. On the Loongson 3 family the MADD4 instructions +are fused, while they are not fused on the other MIPS CPUs. In order to +support both, we have to disabled those instructions. + +For that, the patch below basically corresponds to the --with-madd4=no +used on the GCC side. + +Index: llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/clang/lib/Basic/Targets/Mips.h +=================================================================== +--- llvm-toolchain-snapshot_12~++20201031095131+aab71d44431.orig/clang/lib/Basic/Targets/Mips.h ++++ llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/clang/lib/Basic/Targets/Mips.h +@@ -332,6 +332,8 @@ public: + HasMSA = true; + else if (Feature == "+nomadd4") + DisableMadd4 = true; ++ else if (Feature == "-nomadd4") ++ DisableMadd4 = false; + else if (Feature == "+fp64") + FPMode = FP64; + else if (Feature == "-fp64") +Index: llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/llvm/lib/Target/Mips/MipsSubtarget.cpp +=================================================================== +--- llvm-toolchain-snapshot_12~++20201031095131+aab71d44431.orig/llvm/lib/Target/Mips/MipsSubtarget.cpp ++++ llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/llvm/lib/Target/Mips/MipsSubtarget.cpp +@@ -80,7 +80,7 @@ MipsSubtarget::MipsSubtarget(const Tripl + InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), + HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), + Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), +- HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false), ++ HasEVA(false), DisableMadd4(true), HasMT(false), HasCRC(false), + HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false), + StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), + TSInfo(), InstrInfo(MipsInstrInfo::create( +@@ -91,6 +91,9 @@ MipsSubtarget::MipsSubtarget(const Tripl + if (MipsArchVersion == MipsDefault) + MipsArchVersion = Mips32; + ++ if (hasMips32r6() || hasMips64r6()) ++ DisableMadd4 = false; ++ + // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not + // been tested and currently exist for the integrated assembler only. + if (MipsArchVersion == Mips1) +@@ -238,6 +241,7 @@ MipsSubtarget & + MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM) { + StringRef CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU); ++ SubtargetFeatures Features(FS); + + // Parse features string. + ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); +@@ -260,6 +264,13 @@ MipsSubtarget::initializeSubtargetDepend + report_fatal_error("64-bit code requested on a subtarget that doesn't " + "support it!"); + ++ for (const std::string &Feature : Features.getFeatures()) { ++ if (Feature == "+nomadd4") ++ DisableMadd4 = true; ++ else if (Feature == "-nomadd4") ++ DisableMadd4 = false; ++ } ++ + return *this; + } + diff -Nru llvm-toolchain-12-12.0.0/debian/patches/mips/mips-fpxx-enable.diff llvm-toolchain-12-12.0.1/debian/patches/mips/mips-fpxx-enable.diff --- llvm-toolchain-12-12.0.0/debian/patches/mips/mips-fpxx-enable.diff 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/mips/mips-fpxx-enable.diff 2021-06-21 14:43:53.000000000 +0000 @@ -0,0 +1,17 @@ +Index: llvm-toolchain-snapshot_11~++20200326111000+4673699a470/clang/lib/Driver/ToolChains/Arch/Mips.cpp +=================================================================== +--- llvm-toolchain-snapshot_11~++20200326111000+4673699a470.orig/clang/lib/Driver/ToolChains/Arch/Mips.cpp ++++ llvm-toolchain-snapshot_11~++20200326111000+4673699a470/clang/lib/Driver/ToolChains/Arch/Mips.cpp +@@ -468,10 +468,10 @@ bool mips::isFP64ADefault(const llvm::Tr + + bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, + StringRef ABIName, mips::FloatABI FloatABI) { +- if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && ++/* if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && + Triple.getVendor() != llvm::Triple::MipsTechnologies && + !Triple.isAndroid()) +- return false; ++ return false;*/ + + if (ABIName != "32") + return false; diff -Nru llvm-toolchain-12-12.0.0/debian/patches/mips-force-nomadd4.diff llvm-toolchain-12-12.0.1/debian/patches/mips-force-nomadd4.diff --- llvm-toolchain-12-12.0.0/debian/patches/mips-force-nomadd4.diff 2021-02-04 12:38:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/mips-force-nomadd4.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,92 +0,0 @@ -The MIPS port aims to support the Loongson 3 family of CPUs in addition -of the other MIPS CPUs. On the Loongson 3 family the MADD4 instructions -are fused, while they are not fused on the other MIPS CPUs. In order to -support both, we have to disabled those instructions. - -For that, the patch below basically corresponds to the --with-madd4=no -used on the GCC side. - -Index: llvm-toolchain-11-11.0.0/clang/lib/Basic/Targets/Mips.h -=================================================================== ---- llvm-toolchain-11-11.0.0.orig/clang/lib/Basic/Targets/Mips.h -+++ llvm-toolchain-11-11.0.0/clang/lib/Basic/Targets/Mips.h -@@ -332,6 +332,8 @@ public: - HasMSA = true; - else if (Feature == "+nomadd4") - DisableMadd4 = true; -+ else if (Feature == "-nomadd4") -+ DisableMadd4 = false; - else if (Feature == "+fp64") - FPMode = FP64; - else if (Feature == "-fp64") -Index: llvm-toolchain-11-11.0.0/llvm/lib/Target/Mips/MipsSubtarget.cpp -=================================================================== ---- llvm-toolchain-11-11.0.0.orig/llvm/lib/Target/Mips/MipsSubtarget.cpp -+++ llvm-toolchain-11-11.0.0/llvm/lib/Target/Mips/MipsSubtarget.cpp -@@ -79,7 +79,7 @@ MipsSubtarget::MipsSubtarget(const Tripl - InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), - InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), - AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), -- UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false), -+ UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(true), - HasMT(false), HasCRC(false), HasVirt(false), HasGINV(false), - UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride), - TM(TM), TargetTriple(TT), TSInfo(), -@@ -91,6 +91,9 @@ MipsSubtarget::MipsSubtarget(const Tripl - if (MipsArchVersion == MipsDefault) - MipsArchVersion = Mips32; - -+ if (hasMips32r6() || hasMips64r6()) -+ DisableMadd4 = false; -+ - // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not - // been tested and currently exist for the integrated assembler only. - if (MipsArchVersion == Mips1) -@@ -238,6 +241,7 @@ MipsSubtarget & - MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, - const TargetMachine &TM) { - StringRef CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU); -+ SubtargetFeatures Features(FS); - - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); -@@ -260,6 +264,13 @@ MipsSubtarget::initializeSubtargetDepend - report_fatal_error("64-bit code requested on a subtarget that doesn't " - "support it!"); - -+ for (const std::string &Feature : Features.getFeatures()) { -+ if (Feature == "+nomadd4") -+ DisableMadd4 = true; -+ else if (Feature == "-nomadd4") -+ DisableMadd4 = false; -+ } -+ - return *this; - } - -Index: llvm-toolchain-11-11.0.0/llvm/lib/Target/Mips/Mips.td -=================================================================== ---- llvm-toolchain-11-11.0.0.orig/llvm/lib/Target/Mips/Mips.td -+++ llvm-toolchain-11-11.0.0/llvm/lib/Target/Mips/Mips.td -@@ -191,7 +191,7 @@ def FeatureUseTCCInDIV : SubtargetFeatur - "UseTCCInDIV", "false", - "Force the assembler to use trapping">; - --def FeatureMadd4 -+def FeatureNoMadd4 - : SubtargetFeature<"nomadd4", "DisableMadd4", "true", - "Disable 4-operand madd.fmt and related instructions">; - -Index: llvm-toolchain-11-11.0.0/llvm/lib/Target/Mips/MipsInstrInfo.td -=================================================================== ---- llvm-toolchain-11-11.0.0.orig/llvm/lib/Target/Mips/MipsInstrInfo.td -+++ llvm-toolchain-11-11.0.0/llvm/lib/Target/Mips/MipsInstrInfo.td -@@ -242,7 +242,7 @@ def HasEVA : Predicate<"Subtarg - def HasMSA : Predicate<"Subtarget->hasMSA()">, - AssemblerPredicate<(all_of FeatureMSA)>; - def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, -- AssemblerPredicate<(all_of (not FeatureMadd4))>; -+ AssemblerPredicate<(all_of (not FeatureNoMadd4))>; - def HasMT : Predicate<"Subtarget->hasMT()">, - AssemblerPredicate<(all_of FeatureMT)>; - def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">, diff -Nru llvm-toolchain-12-12.0.0/debian/patches/mips-force-nomadd4.patch llvm-toolchain-12-12.0.1/debian/patches/mips-force-nomadd4.patch --- llvm-toolchain-12-12.0.0/debian/patches/mips-force-nomadd4.patch 2021-02-04 12:38:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/mips-force-nomadd4.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -The MIPS port aims to support the Loongson 3 family of CPUs in addition -of the other MIPS CPUs. On the Loongson 3 family the MADD4 instructions -are fused, while they are not fused on the other MIPS CPUs. In order to -support both, we have to disabled those instructions. - -For that, the patch below basically corresponds to the --with-madd4=no -used on the GCC side. - -Index: llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/clang/lib/Basic/Targets/Mips.h -=================================================================== ---- llvm-toolchain-snapshot_12~++20201031095131+aab71d44431.orig/clang/lib/Basic/Targets/Mips.h -+++ llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/clang/lib/Basic/Targets/Mips.h -@@ -332,6 +332,8 @@ public: - HasMSA = true; - else if (Feature == "+nomadd4") - DisableMadd4 = true; -+ else if (Feature == "-nomadd4") -+ DisableMadd4 = false; - else if (Feature == "+fp64") - FPMode = FP64; - else if (Feature == "-fp64") -Index: llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/llvm/lib/Target/Mips/MipsSubtarget.cpp -=================================================================== ---- llvm-toolchain-snapshot_12~++20201031095131+aab71d44431.orig/llvm/lib/Target/Mips/MipsSubtarget.cpp -+++ llvm-toolchain-snapshot_12~++20201031095131+aab71d44431/llvm/lib/Target/Mips/MipsSubtarget.cpp -@@ -80,7 +80,7 @@ MipsSubtarget::MipsSubtarget(const Tripl - InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), - HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), - Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), -- HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false), -+ HasEVA(false), DisableMadd4(true), HasMT(false), HasCRC(false), - HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false), - StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), - TSInfo(), InstrInfo(MipsInstrInfo::create( -@@ -91,6 +91,9 @@ MipsSubtarget::MipsSubtarget(const Tripl - if (MipsArchVersion == MipsDefault) - MipsArchVersion = Mips32; - -+ if (hasMips32r6() || hasMips64r6()) -+ DisableMadd4 = false; -+ - // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not - // been tested and currently exist for the integrated assembler only. - if (MipsArchVersion == Mips1) -@@ -238,6 +241,7 @@ MipsSubtarget & - MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, - const TargetMachine &TM) { - StringRef CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU); -+ SubtargetFeatures Features(FS); - - // Parse features string. - ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); -@@ -260,6 +264,13 @@ MipsSubtarget::initializeSubtargetDepend - report_fatal_error("64-bit code requested on a subtarget that doesn't " - "support it!"); - -+ for (const std::string &Feature : Features.getFeatures()) { -+ if (Feature == "+nomadd4") -+ DisableMadd4 = true; -+ else if (Feature == "-nomadd4") -+ DisableMadd4 = false; -+ } -+ - return *this; - } - diff -Nru llvm-toolchain-12-12.0.0/debian/patches/mips-fpxx-enable.diff llvm-toolchain-12-12.0.1/debian/patches/mips-fpxx-enable.diff --- llvm-toolchain-12-12.0.0/debian/patches/mips-fpxx-enable.diff 2021-02-04 12:38:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/mips-fpxx-enable.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -Index: llvm-toolchain-snapshot_11~++20200326111000+4673699a470/clang/lib/Driver/ToolChains/Arch/Mips.cpp -=================================================================== ---- llvm-toolchain-snapshot_11~++20200326111000+4673699a470.orig/clang/lib/Driver/ToolChains/Arch/Mips.cpp -+++ llvm-toolchain-snapshot_11~++20200326111000+4673699a470/clang/lib/Driver/ToolChains/Arch/Mips.cpp -@@ -468,10 +468,10 @@ bool mips::isFP64ADefault(const llvm::Tr - - bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, - StringRef ABIName, mips::FloatABI FloatABI) { -- if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && -+/* if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && - Triple.getVendor() != llvm::Triple::MipsTechnologies && - !Triple.isAndroid()) -- return false; -+ return false;*/ - - if (ABIName != "32") - return false; diff -Nru llvm-toolchain-12-12.0.0/debian/patches/scan-build-py-fix-analyze-path.diff llvm-toolchain-12-12.0.1/debian/patches/scan-build-py-fix-analyze-path.diff --- llvm-toolchain-12-12.0.0/debian/patches/scan-build-py-fix-analyze-path.diff 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/scan-build-py-fix-analyze-path.diff 2021-06-23 16:15:31.000000000 +0000 @@ -0,0 +1,15 @@ +Index: llvm-toolchain-12-12.0.1~+rc2/clang/tools/scan-build-py/libscanbuild/analyze.py +=================================================================== +--- llvm-toolchain-12-12.0.1~+rc2.orig/clang/tools/scan-build-py/libscanbuild/analyze.py ++++ llvm-toolchain-12-12.0.1~+rc2/clang/tools/scan-build-py/libscanbuild/analyze.py +@@ -39,8 +39,8 @@ from libscanbuild.shell import decode + + __all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper'] + +-COMPILER_WRAPPER_CC = 'analyze-cc' +-COMPILER_WRAPPER_CXX = 'analyze-c++' ++COMPILER_WRAPPER_CC = '/usr/share/clang/scan-build-py-12/bin/analyze-cc' ++COMPILER_WRAPPER_CXX = '/usr/share/clang/scan-build-py-12/bin/analyze-c++' + + CTU_EXTDEF_MAP_FILENAME = 'externalDefMap.txt' + CTU_TEMP_DEFMAP_FOLDER = 'tmpExternalDefMaps' diff -Nru llvm-toolchain-12-12.0.0/debian/patches/series llvm-toolchain-12-12.0.1/debian/patches/series --- llvm-toolchain-12-12.0.0/debian/patches/series 2021-03-30 06:54:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/series 2021-06-23 16:15:31.000000000 +0000 @@ -81,8 +81,10 @@ libcxx/libcxx-silent-failure-arm64.diff # Change default optims -mips-fpxx-enable.diff -mips-force-nomadd4.patch +mips/mips-fpxx-enable.diff +mips/mips-force-nomadd4.patch +mips/mips-D101773-reloc.patch + 26-set-correct-float-abi.diff clang-baseline-fix-i386.patch disable-sse2-old-x86.diff @@ -121,6 +123,10 @@ # riscv64 risc/clang-riscv64-multiarch.diff +# sparc64 +sparc/sparc-D98574-clang-def.patch +sparc/sparc-D98575-compiler-rt.patch + # Compiler-rt - workaround workaround-bug-42994-use-linker.diff @@ -134,5 +140,6 @@ print-lldb-path.patch libcxx-armhf-ftbfs.diff lld-use-link-atomic-i386.diff -bring-reporter-back.patch +#bring-reporter-back.patch D99501-ignore-lto-auto.diff +scan-build-py-fix-analyze-path.diff diff -Nru llvm-toolchain-12-12.0.0/debian/patches/sparc/sparc-D98574-clang-def.patch llvm-toolchain-12-12.0.1/debian/patches/sparc/sparc-D98574-clang-def.patch --- llvm-toolchain-12-12.0.0/debian/patches/sparc/sparc-D98574-clang-def.patch 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/sparc/sparc-D98574-clang-def.patch 2021-06-21 14:43:53.000000000 +0000 @@ -0,0 +1,3762 @@ +--- llvm-toolchain-12-12.0.1~+rc1.orig/clang/lib/Basic/Targets/Sparc.cpp ++++ llvm-toolchain-12-12.0.1~+rc1/clang/lib/Basic/Targets/Sparc.cpp +@@ -156,8 +156,6 @@ void SparcV8TargetInfo::getTargetDefines + Builder.defineMacro("__sparcv8__"); + break; + case CG_V9: +- Builder.defineMacro("__sparcv9"); +- Builder.defineMacro("__sparcv9__"); + Builder.defineMacro("__sparc_v9__"); + break; + } +@@ -239,13 +237,25 @@ void SparcV8TargetInfo::getTargetDefines + void SparcV9TargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + SparcTargetInfo::getTargetDefines(Opts, Builder); +- Builder.defineMacro("__sparcv9"); + Builder.defineMacro("__arch64__"); +- // Solaris doesn't need these variants, but the BSDs do. +- if (getTriple().getOS() != llvm::Triple::Solaris) { ++ switch (getTriple().getOS()) { ++ case llvm::Triple::Linux: ++ Builder.defineMacro("__sparc_v9__"); ++ break; ++ case llvm::Triple::FreeBSD: ++ case llvm::Triple::NetBSD: + Builder.defineMacro("__sparc64__"); + Builder.defineMacro("__sparc_v9__"); ++ Builder.defineMacro("__sparcv9"); ++ break; ++ case llvm::Triple::OpenBSD: ++ Builder.defineMacro("__sparc64__"); + Builder.defineMacro("__sparcv9__"); ++ Builder.defineMacro("__sparc_v9__"); ++ break; ++ case llvm::Triple::Solaris: ++ Builder.defineMacro("__sparcv9"); ++ break; + } + + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); +--- llvm-toolchain-12-12.0.1~+rc1.orig/clang/test/Preprocessor/predefined-arch-macros.c ++++ llvm-toolchain-12-12.0.1~+rc1/clang/test/Preprocessor/predefined-arch-macros.c +@@ -3418,11 +3418,8 @@ + // CHECK_SPARC: #define __BIG_ENDIAN__ 1 + // CHECK_SPARC: #define __sparc 1 + // CHECK_SPARC: #define __sparc__ 1 +-// CHECK_SPARC-NOT: #define __sparcv9 1 +-// CHECK_SPARC-NOT: #define __sparcv9__ 1 + // CHECK_SPARC: #define __sparcv8 1 +-// CHECK_SPARC-NOT: #define __sparcv9 1 +-// CHECK_SPARC-NOT: #define __sparcv9__ 1 ++// CHECK_SPARC-NOT: #define __sparc_v9__ 1 + + // RUN: %clang -mcpu=v9 -E -dM %s -o - 2>&1 \ + // RUN: -target sparc-unknown-linux \ +@@ -3430,8 +3427,6 @@ + // CHECK_SPARC-V9-NOT: #define __sparcv8 1 + // CHECK_SPARC-V9-NOT: #define __sparcv8__ 1 + // CHECK_SPARC-V9: #define __sparc_v9__ 1 +-// CHECK_SPARC-V9: #define __sparcv9 1 +-// CHECK_SPARC-V9: #define __sparcv9__ 1 + + // RUN: %clang -E -dM %s -o - 2>&1 \ + // RUN: -target sparc-sun-solaris \ +@@ -3518,12 +3513,10 @@ + // RUN: -target sparcv9-unknown-linux \ + // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARCV9 + // CHECK_SPARCV9: #define __BIG_ENDIAN__ 1 ++// CHECK_SPARCV9: #define __arch64__ 1 + // CHECK_SPARCV9: #define __sparc 1 +-// CHECK_SPARCV9: #define __sparc64__ 1 + // CHECK_SPARCV9: #define __sparc__ 1 + // CHECK_SPARCV9: #define __sparc_v9__ 1 +-// CHECK_SPARCV9: #define __sparcv9 1 +-// CHECK_SPARCV9: #define __sparcv9__ 1 + + // RUN: %clang -E -dM %s -o - 2>&1 \ + // RUN: -target sparcv9-unknown-linux \ +--- /dev/null ++++ llvm-toolchain-12-12.0.1~+rc1/clang/test/Preprocessor/predefined-arch-macros.c.orig +@@ -0,0 +1,3681 @@ ++// REQUIRES: amdgpu-registered-target ++// Begin X86/GCC/Linux tests ---------------- ++ ++// RUN: %clang -march=i386 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I386_M32 ++// CHECK_I386_M32: #define __LAHF_SAHF__ 1 ++// CHECK_I386_M32: #define __i386 1 ++// CHECK_I386_M32: #define __i386__ 1 ++// CHECK_I386_M32: #define __tune_i386__ 1 ++// CHECK_I386_M32: #define i386 1 ++// RUN: not %clang -march=i386 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I386_M64 ++// CHECK_I386_M64: error: {{.*}} ++ ++// RUN: %clang -march=i486 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I486_M32 ++// CHECK_I486_M32: #define __i386 1 ++// CHECK_I486_M32: #define __i386__ 1 ++// CHECK_I486_M32: #define __i486 1 ++// CHECK_I486_M32: #define __i486__ 1 ++// CHECK_I486_M32: #define __tune_i486__ 1 ++// CHECK_I486_M32: #define i386 1 ++// RUN: not %clang -march=i486 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I486_M64 ++// CHECK_I486_M64: error: {{.*}} ++ ++// RUN: %clang -march=i586 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I586_M32 ++// CHECK_I586_M32: #define __i386 1 ++// CHECK_I586_M32: #define __i386__ 1 ++// CHECK_I586_M32: #define __i586 1 ++// CHECK_I586_M32: #define __i586__ 1 ++// CHECK_I586_M32: #define __pentium 1 ++// CHECK_I586_M32: #define __pentium__ 1 ++// CHECK_I586_M32: #define __tune_i586__ 1 ++// CHECK_I586_M32: #define __tune_pentium__ 1 ++// CHECK_I586_M32: #define i386 1 ++// RUN: not %clang -march=i586 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I586_M64 ++// CHECK_I586_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM_M32 ++// CHECK_PENTIUM_M32: #define __i386 1 ++// CHECK_PENTIUM_M32: #define __i386__ 1 ++// CHECK_PENTIUM_M32: #define __i586 1 ++// CHECK_PENTIUM_M32: #define __i586__ 1 ++// CHECK_PENTIUM_M32: #define __pentium 1 ++// CHECK_PENTIUM_M32: #define __pentium__ 1 ++// CHECK_PENTIUM_M32: #define __tune_i586__ 1 ++// CHECK_PENTIUM_M32: #define __tune_pentium__ 1 ++// CHECK_PENTIUM_M32: #define i386 1 ++// RUN: not %clang -march=pentium -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM_M64 ++// CHECK_PENTIUM_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium-mmx -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM_MMX_M32 ++// CHECK_PENTIUM_MMX_M32: #define __MMX__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __i386 1 ++// CHECK_PENTIUM_MMX_M32: #define __i386__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __i586 1 ++// CHECK_PENTIUM_MMX_M32: #define __i586__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __pentium 1 ++// CHECK_PENTIUM_MMX_M32: #define __pentium__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __pentium_mmx__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __tune_i586__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __tune_pentium__ 1 ++// CHECK_PENTIUM_MMX_M32: #define __tune_pentium_mmx__ 1 ++// CHECK_PENTIUM_MMX_M32: #define i386 1 ++// RUN: not %clang -march=pentium-mmx -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM_MMX_M64 ++// CHECK_PENTIUM_MMX_M64: error: {{.*}} ++ ++// RUN: %clang -march=winchip-c6 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_WINCHIP_C6_M32 ++// CHECK_WINCHIP_C6_M32: #define __MMX__ 1 ++// CHECK_WINCHIP_C6_M32: #define __i386 1 ++// CHECK_WINCHIP_C6_M32: #define __i386__ 1 ++// CHECK_WINCHIP_C6_M32: #define __i486 1 ++// CHECK_WINCHIP_C6_M32: #define __i486__ 1 ++// CHECK_WINCHIP_C6_M32: #define __tune_i486__ 1 ++// CHECK_WINCHIP_C6_M32: #define i386 1 ++// RUN: not %clang -march=winchip-c6 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_WINCHIP_C6_M64 ++// CHECK_WINCHIP_C6_M64: error: {{.*}} ++ ++// RUN: %clang -march=winchip2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_WINCHIP2_M32 ++// CHECK_WINCHIP2_M32: #define __3dNOW__ 1 ++// CHECK_WINCHIP2_M32: #define __MMX__ 1 ++// CHECK_WINCHIP2_M32: #define __i386 1 ++// CHECK_WINCHIP2_M32: #define __i386__ 1 ++// CHECK_WINCHIP2_M32: #define __i486 1 ++// CHECK_WINCHIP2_M32: #define __i486__ 1 ++// CHECK_WINCHIP2_M32: #define __tune_i486__ 1 ++// CHECK_WINCHIP2_M32: #define i386 1 ++// RUN: not %clang -march=winchip2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_WINCHIP2_M64 ++// CHECK_WINCHIP2_M64: error: {{.*}} ++ ++// RUN: %clang -march=c3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C3_M32 ++// CHECK_C3_M32: #define __3dNOW__ 1 ++// CHECK_C3_M32: #define __MMX__ 1 ++// CHECK_C3_M32: #define __i386 1 ++// CHECK_C3_M32: #define __i386__ 1 ++// CHECK_C3_M32: #define __i486 1 ++// CHECK_C3_M32: #define __i486__ 1 ++// CHECK_C3_M32: #define __tune_i486__ 1 ++// CHECK_C3_M32: #define i386 1 ++// RUN: not %clang -march=c3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C3_M64 ++// CHECK_C3_M64: error: {{.*}} ++ ++// RUN: %clang -march=c3-2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C3_2_M32 ++// CHECK_C3_2_M32: #define __MMX__ 1 ++// CHECK_C3_2_M32: #define __SSE__ 1 ++// CHECK_C3_2_M32: #define __i386 1 ++// CHECK_C3_2_M32: #define __i386__ 1 ++// CHECK_C3_2_M32: #define __i686 1 ++// CHECK_C3_2_M32: #define __i686__ 1 ++// CHECK_C3_2_M32: #define __pentiumpro 1 ++// CHECK_C3_2_M32: #define __pentiumpro__ 1 ++// CHECK_C3_2_M32: #define __tune_i686__ 1 ++// CHECK_C3_2_M32: #define __tune_pentium2__ 1 ++// CHECK_C3_2_M32: #define __tune_pentiumpro__ 1 ++// CHECK_C3_2_M32: #define i386 1 ++// RUN: not %clang -march=c3-2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C3_2_M64 ++// CHECK_C3_2_M64: error: {{.*}} ++ ++// RUN: %clang -march=i686 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I686_M32 ++// CHECK_I686_M32: #define __i386 1 ++// CHECK_I686_M32: #define __i386__ 1 ++// CHECK_I686_M32: #define __i686 1 ++// CHECK_I686_M32: #define __i686__ 1 ++// CHECK_I686_M32: #define __pentiumpro 1 ++// CHECK_I686_M32: #define __pentiumpro__ 1 ++// CHECK_I686_M32: #define __tune_i686__ 1 ++// CHECK_I686_M32: #define __tune_pentiumpro__ 1 ++// CHECK_I686_M32: #define i386 1 ++// RUN: not %clang -march=i686 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_I686_M64 ++// CHECK_I686_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentiumpro -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUMPRO_M32 ++// CHECK_PENTIUMPRO_M32: #define __i386 1 ++// CHECK_PENTIUMPRO_M32: #define __i386__ 1 ++// CHECK_PENTIUMPRO_M32: #define __i686 1 ++// CHECK_PENTIUMPRO_M32: #define __i686__ 1 ++// CHECK_PENTIUMPRO_M32: #define __pentiumpro 1 ++// CHECK_PENTIUMPRO_M32: #define __pentiumpro__ 1 ++// CHECK_PENTIUMPRO_M32: #define __tune_i686__ 1 ++// CHECK_PENTIUMPRO_M32: #define __tune_pentiumpro__ 1 ++// CHECK_PENTIUMPRO_M32: #define i386 1 ++// RUN: not %clang -march=pentiumpro -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUMPRO_M64 ++// CHECK_PENTIUMPRO_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM2_M32 ++// CHECK_PENTIUM2_M32: #define __FXSR__ 1 ++// CHECK_PENTIUM2_M32: #define __MMX__ 1 ++// CHECK_PENTIUM2_M32: #define __i386 1 ++// CHECK_PENTIUM2_M32: #define __i386__ 1 ++// CHECK_PENTIUM2_M32: #define __i686 1 ++// CHECK_PENTIUM2_M32: #define __i686__ 1 ++// CHECK_PENTIUM2_M32: #define __pentiumpro 1 ++// CHECK_PENTIUM2_M32: #define __pentiumpro__ 1 ++// CHECK_PENTIUM2_M32: #define __tune_i686__ 1 ++// CHECK_PENTIUM2_M32: #define __tune_pentium2__ 1 ++// CHECK_PENTIUM2_M32: #define __tune_pentiumpro__ 1 ++// CHECK_PENTIUM2_M32: #define i386 1 ++// RUN: not %clang -march=pentium2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM2_M64 ++// CHECK_PENTIUM2_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM3_M32 ++// CHECK_PENTIUM3_M32: #define __MMX__ 1 ++// CHECK_PENTIUM3_M32: #define __SSE__ 1 ++// CHECK_PENTIUM3_M32: #define __i386 1 ++// CHECK_PENTIUM3_M32: #define __i386__ 1 ++// CHECK_PENTIUM3_M32: #define __i686 1 ++// CHECK_PENTIUM3_M32: #define __i686__ 1 ++// CHECK_PENTIUM3_M32: #define __pentiumpro 1 ++// CHECK_PENTIUM3_M32: #define __pentiumpro__ 1 ++// CHECK_PENTIUM3_M32: #define __tune_i686__ 1 ++// CHECK_PENTIUM3_M32: #define __tune_pentium2__ 1 ++// CHECK_PENTIUM3_M32: #define __tune_pentium3__ 1 ++// CHECK_PENTIUM3_M32: #define __tune_pentiumpro__ 1 ++// CHECK_PENTIUM3_M32: #define i386 1 ++// RUN: not %clang -march=pentium3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM3_M64 ++// CHECK_PENTIUM3_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium3m -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM3M_M32 ++// CHECK_PENTIUM3M_M32: #define __MMX__ 1 ++// CHECK_PENTIUM3M_M32: #define __SSE__ 1 ++// CHECK_PENTIUM3M_M32: #define __i386 1 ++// CHECK_PENTIUM3M_M32: #define __i386__ 1 ++// CHECK_PENTIUM3M_M32: #define __i686 1 ++// CHECK_PENTIUM3M_M32: #define __i686__ 1 ++// CHECK_PENTIUM3M_M32: #define __pentiumpro 1 ++// CHECK_PENTIUM3M_M32: #define __pentiumpro__ 1 ++// CHECK_PENTIUM3M_M32: #define __tune_i686__ 1 ++// CHECK_PENTIUM3M_M32: #define __tune_pentiumpro__ 1 ++// CHECK_PENTIUM3M_M32: #define i386 1 ++// RUN: not %clang -march=pentium3m -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM3M_M64 ++// CHECK_PENTIUM3M_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium-m -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM_M_M32 ++// CHECK_PENTIUM_M_M32: #define __MMX__ 1 ++// CHECK_PENTIUM_M_M32: #define __SSE2__ 1 ++// CHECK_PENTIUM_M_M32: #define __SSE__ 1 ++// CHECK_PENTIUM_M_M32: #define __i386 1 ++// CHECK_PENTIUM_M_M32: #define __i386__ 1 ++// CHECK_PENTIUM_M_M32: #define __i686 1 ++// CHECK_PENTIUM_M_M32: #define __i686__ 1 ++// CHECK_PENTIUM_M_M32: #define __pentiumpro 1 ++// CHECK_PENTIUM_M_M32: #define __pentiumpro__ 1 ++// CHECK_PENTIUM_M_M32: #define __tune_i686__ 1 ++// CHECK_PENTIUM_M_M32: #define __tune_pentiumpro__ 1 ++// CHECK_PENTIUM_M_M32: #define i386 1 ++// RUN: not %clang -march=pentium-m -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM_M_M64 ++// CHECK_PENTIUM_M_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium4 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM4_M32 ++// CHECK_PENTIUM4_M32: #define __MMX__ 1 ++// CHECK_PENTIUM4_M32: #define __SSE2__ 1 ++// CHECK_PENTIUM4_M32: #define __SSE__ 1 ++// CHECK_PENTIUM4_M32: #define __i386 1 ++// CHECK_PENTIUM4_M32: #define __i386__ 1 ++// CHECK_PENTIUM4_M32: #define __pentium4 1 ++// CHECK_PENTIUM4_M32: #define __pentium4__ 1 ++// CHECK_PENTIUM4_M32: #define __tune_pentium4__ 1 ++// CHECK_PENTIUM4_M32: #define i386 1 ++// RUN: not %clang -march=pentium4 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM4_M64 ++// CHECK_PENTIUM4_M64: error: {{.*}} ++ ++// RUN: %clang -march=pentium4m -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM4M_M32 ++// CHECK_PENTIUM4M_M32: #define __MMX__ 1 ++// CHECK_PENTIUM4M_M32: #define __SSE2__ 1 ++// CHECK_PENTIUM4M_M32: #define __SSE__ 1 ++// CHECK_PENTIUM4M_M32: #define __i386 1 ++// CHECK_PENTIUM4M_M32: #define __i386__ 1 ++// CHECK_PENTIUM4M_M32: #define __pentium4 1 ++// CHECK_PENTIUM4M_M32: #define __pentium4__ 1 ++// CHECK_PENTIUM4M_M32: #define __tune_pentium4__ 1 ++// CHECK_PENTIUM4M_M32: #define i386 1 ++// RUN: not %clang -march=pentium4m -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PENTIUM4M_M64 ++// CHECK_PENTIUM4M_M64: error: {{.*}} ++ ++// RUN: %clang -march=yonah -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_YONAH_M32 ++// CHECK_YONAH_M32: #define __MMX__ 1 ++// CHECK_YONAH_M32: #define __SSE2__ 1 ++// CHECK_YONAH_M32: #define __SSE3__ 1 ++// CHECK_YONAH_M32: #define __SSE__ 1 ++// CHECK_YONAH_M32: #define __i386 1 ++// CHECK_YONAH_M32: #define __i386__ 1 ++// CHECK_YONAH_M32: #define __nocona 1 ++// CHECK_YONAH_M32: #define __nocona__ 1 ++// CHECK_YONAH_M32: #define __tune_nocona__ 1 ++// CHECK_YONAH_M32: #define i386 1 ++// RUN: not %clang -march=yonah -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_YONAH_M64 ++// CHECK_YONAH_M64: error: {{.*}} ++ ++// RUN: %clang -march=prescott -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PRESCOTT_M32 ++// CHECK_PRESCOTT_M32: #define __MMX__ 1 ++// CHECK_PRESCOTT_M32: #define __SSE2__ 1 ++// CHECK_PRESCOTT_M32: #define __SSE3__ 1 ++// CHECK_PRESCOTT_M32: #define __SSE__ 1 ++// CHECK_PRESCOTT_M32: #define __i386 1 ++// CHECK_PRESCOTT_M32: #define __i386__ 1 ++// CHECK_PRESCOTT_M32: #define __nocona 1 ++// CHECK_PRESCOTT_M32: #define __nocona__ 1 ++// CHECK_PRESCOTT_M32: #define __tune_nocona__ 1 ++// CHECK_PRESCOTT_M32: #define i386 1 ++// RUN: not %clang -march=prescott -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PRESCOTT_M64 ++// CHECK_PRESCOTT_M64: error: {{.*}} ++ ++// RUN: %clang -march=nocona -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_NOCONA_M32 ++// CHECK_NOCONA_M32: #define __MMX__ 1 ++// CHECK_NOCONA_M32: #define __SSE2__ 1 ++// CHECK_NOCONA_M32: #define __SSE3__ 1 ++// CHECK_NOCONA_M32: #define __SSE__ 1 ++// CHECK_NOCONA_M32: #define __i386 1 ++// CHECK_NOCONA_M32: #define __i386__ 1 ++// CHECK_NOCONA_M32: #define __nocona 1 ++// CHECK_NOCONA_M32: #define __nocona__ 1 ++// CHECK_NOCONA_M32: #define __tune_nocona__ 1 ++// CHECK_NOCONA_M32: #define i386 1 ++// RUN: %clang -march=nocona -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_NOCONA_M64 ++// CHECK_NOCONA_M64: #define __MMX__ 1 ++// CHECK_NOCONA_M64: #define __SSE2_MATH__ 1 ++// CHECK_NOCONA_M64: #define __SSE2__ 1 ++// CHECK_NOCONA_M64: #define __SSE3__ 1 ++// CHECK_NOCONA_M64: #define __SSE_MATH__ 1 ++// CHECK_NOCONA_M64: #define __SSE__ 1 ++// CHECK_NOCONA_M64: #define __amd64 1 ++// CHECK_NOCONA_M64: #define __amd64__ 1 ++// CHECK_NOCONA_M64: #define __nocona 1 ++// CHECK_NOCONA_M64: #define __nocona__ 1 ++// CHECK_NOCONA_M64: #define __tune_nocona__ 1 ++// CHECK_NOCONA_M64: #define __x86_64 1 ++// CHECK_NOCONA_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=core2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CORE2_M32 ++// CHECK_CORE2_M32: #define __MMX__ 1 ++// CHECK_CORE2_M32: #define __SSE2__ 1 ++// CHECK_CORE2_M32: #define __SSE3__ 1 ++// CHECK_CORE2_M32: #define __SSE__ 1 ++// CHECK_CORE2_M32: #define __SSSE3__ 1 ++// CHECK_CORE2_M32: #define __core2 1 ++// CHECK_CORE2_M32: #define __core2__ 1 ++// CHECK_CORE2_M32: #define __i386 1 ++// CHECK_CORE2_M32: #define __i386__ 1 ++// CHECK_CORE2_M32: #define __tune_core2__ 1 ++// CHECK_CORE2_M32: #define i386 1 ++ ++// RUN: %clang -march=core2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CORE2_M64 ++// CHECK_CORE2_M64: #define __MMX__ 1 ++// CHECK_CORE2_M64: #define __SSE2_MATH__ 1 ++// CHECK_CORE2_M64: #define __SSE2__ 1 ++// CHECK_CORE2_M64: #define __SSE3__ 1 ++// CHECK_CORE2_M64: #define __SSE_MATH__ 1 ++// CHECK_CORE2_M64: #define __SSE__ 1 ++// CHECK_CORE2_M64: #define __SSSE3__ 1 ++// CHECK_CORE2_M64: #define __amd64 1 ++// CHECK_CORE2_M64: #define __amd64__ 1 ++// CHECK_CORE2_M64: #define __core2 1 ++// CHECK_CORE2_M64: #define __core2__ 1 ++// CHECK_CORE2_M64: #define __tune_core2__ 1 ++// CHECK_CORE2_M64: #define __x86_64 1 ++// CHECK_CORE2_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=corei7 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_COREI7_M32 ++// CHECK_COREI7_M32: #define __MMX__ 1 ++// CHECK_COREI7_M32: #define __POPCNT__ 1 ++// CHECK_COREI7_M32: #define __SSE2__ 1 ++// CHECK_COREI7_M32: #define __SSE3__ 1 ++// CHECK_COREI7_M32: #define __SSE4_1__ 1 ++// CHECK_COREI7_M32: #define __SSE4_2__ 1 ++// CHECK_COREI7_M32: #define __SSE__ 1 ++// CHECK_COREI7_M32: #define __SSSE3__ 1 ++// CHECK_COREI7_M32: #define __corei7 1 ++// CHECK_COREI7_M32: #define __corei7__ 1 ++// CHECK_COREI7_M32: #define __i386 1 ++// CHECK_COREI7_M32: #define __i386__ 1 ++// CHECK_COREI7_M32: #define __tune_corei7__ 1 ++// CHECK_COREI7_M32: #define i386 1 ++ ++// RUN: %clang -march=corei7 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_COREI7_M64 ++// CHECK_COREI7_M64: #define __MMX__ 1 ++// CHECK_COREI7_M64: #define __POPCNT__ 1 ++// CHECK_COREI7_M64: #define __SSE2_MATH__ 1 ++// CHECK_COREI7_M64: #define __SSE2__ 1 ++// CHECK_COREI7_M64: #define __SSE3__ 1 ++// CHECK_COREI7_M64: #define __SSE4_1__ 1 ++// CHECK_COREI7_M64: #define __SSE4_2__ 1 ++// CHECK_COREI7_M64: #define __SSE_MATH__ 1 ++// CHECK_COREI7_M64: #define __SSE__ 1 ++// CHECK_COREI7_M64: #define __SSSE3__ 1 ++// CHECK_COREI7_M64: #define __amd64 1 ++// CHECK_COREI7_M64: #define __amd64__ 1 ++// CHECK_COREI7_M64: #define __corei7 1 ++// CHECK_COREI7_M64: #define __corei7__ 1 ++// CHECK_COREI7_M64: #define __tune_corei7__ 1 ++// CHECK_COREI7_M64: #define __x86_64 1 ++// CHECK_COREI7_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=corei7-avx -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_COREI7_AVX_M32 ++// CHECK_COREI7_AVX_M32: #define __AVX__ 1 ++// CHECK_COREI7_AVX_M32: #define __MMX__ 1 ++// CHECK_COREI7_AVX_M32: #define __PCLMUL__ 1 ++// CHECK_COREI7_AVX_M32-NOT: __RDRND__ ++// CHECK_COREI7_AVX_M32: #define __POPCNT__ 1 ++// CHECK_COREI7_AVX_M32: #define __SSE2__ 1 ++// CHECK_COREI7_AVX_M32: #define __SSE3__ 1 ++// CHECK_COREI7_AVX_M32: #define __SSE4_1__ 1 ++// CHECK_COREI7_AVX_M32: #define __SSE4_2__ 1 ++// CHECK_COREI7_AVX_M32: #define __SSE__ 1 ++// CHECK_COREI7_AVX_M32: #define __SSSE3__ 1 ++// CHECK_COREI7_AVX_M32: #define __XSAVEOPT__ 1 ++// CHECK_COREI7_AVX_M32: #define __XSAVE__ 1 ++// CHECK_COREI7_AVX_M32: #define __corei7 1 ++// CHECK_COREI7_AVX_M32: #define __corei7__ 1 ++// CHECK_COREI7_AVX_M32: #define __i386 1 ++// CHECK_COREI7_AVX_M32: #define __i386__ 1 ++// CHECK_COREI7_AVX_M32: #define __tune_corei7__ 1 ++// CHECK_COREI7_AVX_M32: #define i386 1 ++ ++// RUN: %clang -march=corei7-avx -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_COREI7_AVX_M64 ++// CHECK_COREI7_AVX_M64: #define __AVX__ 1 ++// CHECK_COREI7_AVX_M64: #define __MMX__ 1 ++// CHECK_COREI7_AVX_M64: #define __PCLMUL__ 1 ++// CHECK_COREI7_AVX_M64-NOT: __RDRND__ ++// CHECK_COREI7_AVX_M64: #define __POPCNT__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE2_MATH__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE2__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE3__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE4_1__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE4_2__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE_MATH__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSE__ 1 ++// CHECK_COREI7_AVX_M64: #define __SSSE3__ 1 ++// CHECK_COREI7_AVX_M64: #define __XSAVEOPT__ 1 ++// CHECK_COREI7_AVX_M64: #define __XSAVE__ 1 ++// CHECK_COREI7_AVX_M64: #define __amd64 1 ++// CHECK_COREI7_AVX_M64: #define __amd64__ 1 ++// CHECK_COREI7_AVX_M64: #define __corei7 1 ++// CHECK_COREI7_AVX_M64: #define __corei7__ 1 ++// CHECK_COREI7_AVX_M64: #define __tune_corei7__ 1 ++// CHECK_COREI7_AVX_M64: #define __x86_64 1 ++// CHECK_COREI7_AVX_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=core-avx-i -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CORE_AVX_I_M32 ++// CHECK_CORE_AVX_I_M32: #define __AVX__ 1 ++// CHECK_CORE_AVX_I_M32: #define __F16C__ 1 ++// CHECK_CORE_AVX_I_M32: #define __MMX__ 1 ++// CHECK_CORE_AVX_I_M32: #define __PCLMUL__ 1 ++// CHECK_CORE_AVX_I_M32: #define __RDRND__ 1 ++// CHECK_CORE_AVX_I_M32: #define __SSE2__ 1 ++// CHECK_CORE_AVX_I_M32: #define __SSE3__ 1 ++// CHECK_CORE_AVX_I_M32: #define __SSE4_1__ 1 ++// CHECK_CORE_AVX_I_M32: #define __SSE4_2__ 1 ++// CHECK_CORE_AVX_I_M32: #define __SSE__ 1 ++// CHECK_CORE_AVX_I_M32: #define __SSSE3__ 1 ++// CHECK_CORE_AVX_I_M32: #define __XSAVEOPT__ 1 ++// CHECK_CORE_AVX_I_M32: #define __XSAVE__ 1 ++// CHECK_CORE_AVX_I_M32: #define __corei7 1 ++// CHECK_CORE_AVX_I_M32: #define __corei7__ 1 ++// CHECK_CORE_AVX_I_M32: #define __i386 1 ++// CHECK_CORE_AVX_I_M32: #define __i386__ 1 ++// CHECK_CORE_AVX_I_M32: #define __tune_corei7__ 1 ++// CHECK_CORE_AVX_I_M32: #define i386 1 ++ ++// RUN: %clang -march=core-avx-i -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CORE_AVX_I_M64 ++// CHECK_CORE_AVX_I_M64: #define __AVX__ 1 ++// CHECK_CORE_AVX_I_M64: #define __F16C__ 1 ++// CHECK_CORE_AVX_I_M64: #define __MMX__ 1 ++// CHECK_CORE_AVX_I_M64: #define __PCLMUL__ 1 ++// CHECK_CORE_AVX_I_M64: #define __RDRND__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE2_MATH__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE2__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE3__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE4_1__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE4_2__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE_MATH__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSE__ 1 ++// CHECK_CORE_AVX_I_M64: #define __SSSE3__ 1 ++// CHECK_CORE_AVX_I_M64: #define __XSAVEOPT__ 1 ++// CHECK_CORE_AVX_I_M64: #define __XSAVE__ 1 ++// CHECK_CORE_AVX_I_M64: #define __amd64 1 ++// CHECK_CORE_AVX_I_M64: #define __amd64__ 1 ++// CHECK_CORE_AVX_I_M64: #define __corei7 1 ++// CHECK_CORE_AVX_I_M64: #define __corei7__ 1 ++// CHECK_CORE_AVX_I_M64: #define __tune_corei7__ 1 ++// CHECK_CORE_AVX_I_M64: #define __x86_64 1 ++// CHECK_CORE_AVX_I_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=core-avx2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CORE_AVX2_M32 ++// CHECK_CORE_AVX2_M32: #define __AVX2__ 1 ++// CHECK_CORE_AVX2_M32: #define __AVX__ 1 ++// CHECK_CORE_AVX2_M32: #define __BMI2__ 1 ++// CHECK_CORE_AVX2_M32: #define __BMI__ 1 ++// CHECK_CORE_AVX2_M32: #define __F16C__ 1 ++// CHECK_CORE_AVX2_M32: #define __FMA__ 1 ++// CHECK_CORE_AVX2_M32: #define __INVPCID__ 1 ++// CHECK_CORE_AVX2_M32: #define __LAHF_SAHF__ 1 ++// CHECK_CORE_AVX2_M32: #define __LZCNT__ 1 ++// CHECK_CORE_AVX2_M32: #define __MMX__ 1 ++// CHECK_CORE_AVX2_M32: #define __MOVBE__ 1 ++// CHECK_CORE_AVX2_M32: #define __PCLMUL__ 1 ++// CHECK_CORE_AVX2_M32: #define __POPCNT__ 1 ++// CHECK_CORE_AVX2_M32: #define __RDRND__ 1 ++// CHECK_CORE_AVX2_M32: #define __SSE2__ 1 ++// CHECK_CORE_AVX2_M32: #define __SSE3__ 1 ++// CHECK_CORE_AVX2_M32: #define __SSE4_1__ 1 ++// CHECK_CORE_AVX2_M32: #define __SSE4_2__ 1 ++// CHECK_CORE_AVX2_M32: #define __SSE__ 1 ++// CHECK_CORE_AVX2_M32: #define __SSSE3__ 1 ++// CHECK_CORE_AVX2_M32: #define __XSAVEOPT__ 1 ++// CHECK_CORE_AVX2_M32: #define __XSAVE__ 1 ++// CHECK_CORE_AVX2_M32: #define __corei7 1 ++// CHECK_CORE_AVX2_M32: #define __corei7__ 1 ++// CHECK_CORE_AVX2_M32: #define __i386 1 ++// CHECK_CORE_AVX2_M32: #define __i386__ 1 ++// CHECK_CORE_AVX2_M32: #define __tune_corei7__ 1 ++// CHECK_CORE_AVX2_M32: #define i386 1 ++ ++// RUN: %clang -march=core-avx2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CORE_AVX2_M64 ++// CHECK_CORE_AVX2_M64: #define __AVX2__ 1 ++// CHECK_CORE_AVX2_M64: #define __AVX__ 1 ++// CHECK_CORE_AVX2_M64: #define __BMI2__ 1 ++// CHECK_CORE_AVX2_M64: #define __BMI__ 1 ++// CHECK_CORE_AVX2_M64: #define __F16C__ 1 ++// CHECK_CORE_AVX2_M64: #define __FMA__ 1 ++// CHECK_CORE_AVX2_M64: #define __INVPCID__ 1 ++// CHECK_CORE_AVX2_M64: #define __LAHF_SAHF__ 1 ++// CHECK_CORE_AVX2_M64: #define __LZCNT__ 1 ++// CHECK_CORE_AVX2_M64: #define __MMX__ 1 ++// CHECK_CORE_AVX2_M64: #define __MOVBE__ 1 ++// CHECK_CORE_AVX2_M64: #define __PCLMUL__ 1 ++// CHECK_CORE_AVX2_M64: #define __POPCNT__ 1 ++// CHECK_CORE_AVX2_M64: #define __RDRND__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE2_MATH__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE2__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE3__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE4_1__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE4_2__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE_MATH__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSE__ 1 ++// CHECK_CORE_AVX2_M64: #define __SSSE3__ 1 ++// CHECK_CORE_AVX2_M64: #define __XSAVEOPT__ 1 ++// CHECK_CORE_AVX2_M64: #define __XSAVE__ 1 ++// CHECK_CORE_AVX2_M64: #define __amd64 1 ++// CHECK_CORE_AVX2_M64: #define __amd64__ 1 ++// CHECK_CORE_AVX2_M64: #define __corei7 1 ++// CHECK_CORE_AVX2_M64: #define __corei7__ 1 ++// CHECK_CORE_AVX2_M64: #define __tune_corei7__ 1 ++// CHECK_CORE_AVX2_M64: #define __x86_64 1 ++// CHECK_CORE_AVX2_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=broadwell -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BROADWELL_M32 ++// CHECK_BROADWELL_M32: #define __ADX__ 1 ++// CHECK_BROADWELL_M32: #define __AVX2__ 1 ++// CHECK_BROADWELL_M32: #define __AVX__ 1 ++// CHECK_BROADWELL_M32: #define __BMI2__ 1 ++// CHECK_BROADWELL_M32: #define __BMI__ 1 ++// CHECK_BROADWELL_M32: #define __F16C__ 1 ++// CHECK_BROADWELL_M32: #define __FMA__ 1 ++// CHECK_BROADWELL_M32: #define __INVPCID__ 1 ++// CHECK_BROADWELL_M32: #define __LAHF_SAHF__ 1 ++// CHECK_BROADWELL_M32: #define __LZCNT__ 1 ++// CHECK_BROADWELL_M32: #define __MMX__ 1 ++// CHECK_BROADWELL_M32: #define __MOVBE__ 1 ++// CHECK_BROADWELL_M32: #define __PCLMUL__ 1 ++// CHECK_BROADWELL_M32: #define __POPCNT__ 1 ++// CHECK_BROADWELL_M32: #define __PRFCHW__ 1 ++// CHECK_BROADWELL_M32: #define __RDRND__ 1 ++// CHECK_BROADWELL_M32: #define __RDSEED__ 1 ++// CHECK_BROADWELL_M32: #define __SSE2__ 1 ++// CHECK_BROADWELL_M32: #define __SSE3__ 1 ++// CHECK_BROADWELL_M32: #define __SSE4_1__ 1 ++// CHECK_BROADWELL_M32: #define __SSE4_2__ 1 ++// CHECK_BROADWELL_M32: #define __SSE__ 1 ++// CHECK_BROADWELL_M32: #define __SSSE3__ 1 ++// CHECK_BROADWELL_M32: #define __XSAVEOPT__ 1 ++// CHECK_BROADWELL_M32: #define __XSAVE__ 1 ++// CHECK_BROADWELL_M32: #define __corei7 1 ++// CHECK_BROADWELL_M32: #define __corei7__ 1 ++// CHECK_BROADWELL_M32: #define __i386 1 ++// CHECK_BROADWELL_M32: #define __i386__ 1 ++// CHECK_BROADWELL_M32: #define __tune_corei7__ 1 ++// CHECK_BROADWELL_M32: #define i386 1 ++ ++// RUN: %clang -march=broadwell -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BROADWELL_M64 ++// CHECK_BROADWELL_M64: #define __ADX__ 1 ++// CHECK_BROADWELL_M64: #define __AVX2__ 1 ++// CHECK_BROADWELL_M64: #define __AVX__ 1 ++// CHECK_BROADWELL_M64: #define __BMI2__ 1 ++// CHECK_BROADWELL_M64: #define __BMI__ 1 ++// CHECK_BROADWELL_M64: #define __F16C__ 1 ++// CHECK_BROADWELL_M64: #define __FMA__ 1 ++// CHECK_BROADWELL_M64: #define __INVPCID__ 1 ++// CHECK_BROADWELL_M64: #define __LAHF_SAHF__ 1 ++// CHECK_BROADWELL_M64: #define __LZCNT__ 1 ++// CHECK_BROADWELL_M64: #define __MMX__ 1 ++// CHECK_BROADWELL_M64: #define __MOVBE__ 1 ++// CHECK_BROADWELL_M64: #define __PCLMUL__ 1 ++// CHECK_BROADWELL_M64: #define __POPCNT__ 1 ++// CHECK_BROADWELL_M64: #define __PRFCHW__ 1 ++// CHECK_BROADWELL_M64: #define __RDRND__ 1 ++// CHECK_BROADWELL_M64: #define __RDSEED__ 1 ++// CHECK_BROADWELL_M64: #define __SSE2_MATH__ 1 ++// CHECK_BROADWELL_M64: #define __SSE2__ 1 ++// CHECK_BROADWELL_M64: #define __SSE3__ 1 ++// CHECK_BROADWELL_M64: #define __SSE4_1__ 1 ++// CHECK_BROADWELL_M64: #define __SSE4_2__ 1 ++// CHECK_BROADWELL_M64: #define __SSE_MATH__ 1 ++// CHECK_BROADWELL_M64: #define __SSE__ 1 ++// CHECK_BROADWELL_M64: #define __SSSE3__ 1 ++// CHECK_BROADWELL_M64: #define __XSAVEOPT__ 1 ++// CHECK_BROADWELL_M64: #define __XSAVE__ 1 ++// CHECK_BROADWELL_M64: #define __amd64 1 ++// CHECK_BROADWELL_M64: #define __amd64__ 1 ++// CHECK_BROADWELL_M64: #define __corei7 1 ++// CHECK_BROADWELL_M64: #define __corei7__ 1 ++// CHECK_BROADWELL_M64: #define __tune_corei7__ 1 ++// CHECK_BROADWELL_M64: #define __x86_64 1 ++// CHECK_BROADWELL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=skylake -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SKL_M32 ++// CHECK_SKL_M32: #define __ADX__ 1 ++// CHECK_SKL_M32: #define __AES__ 1 ++// CHECK_SKL_M32: #define __AVX2__ 1 ++// CHECK_SKL_M32: #define __AVX__ 1 ++// CHECK_SKL_M32: #define __BMI2__ 1 ++// CHECK_SKL_M32: #define __BMI__ 1 ++// CHECK_SKL_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_SKL_M32: #define __F16C__ 1 ++// CHECK_SKL_M32: #define __FMA__ 1 ++// CHECK_SKL_M32: #define __INVPCID__ 1 ++// CHECK_SKL_M32: #define __LZCNT__ 1 ++// CHECK_SKL_M32: #define __MMX__ 1 ++// CHECK_SKL_M32: #define __MOVBE__ 1 ++// CHECK_SKL_M32: #define __PCLMUL__ 1 ++// CHECK_SKL_M32: #define __POPCNT__ 1 ++// CHECK_SKL_M32: #define __PRFCHW__ 1 ++// CHECK_SKL_M32: #define __RDRND__ 1 ++// CHECK_SKL_M32: #define __RDSEED__ 1 ++// CHECK_SKL_M32: #define __SGX__ 1 ++// CHECK_SKL_M32: #define __SSE2__ 1 ++// CHECK_SKL_M32: #define __SSE3__ 1 ++// CHECK_SKL_M32: #define __SSE4_1__ 1 ++// CHECK_SKL_M32: #define __SSE4_2__ 1 ++// CHECK_SKL_M32: #define __SSE__ 1 ++// CHECK_SKL_M32: #define __SSSE3__ 1 ++// CHECK_SKL_M32: #define __XSAVEC__ 1 ++// CHECK_SKL_M32: #define __XSAVEOPT__ 1 ++// CHECK_SKL_M32: #define __XSAVES__ 1 ++// CHECK_SKL_M32: #define __XSAVE__ 1 ++// CHECK_SKL_M32: #define i386 1 ++ ++// RUN: %clang -march=skylake -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SKL_M64 ++// CHECK_SKL_M64: #define __ADX__ 1 ++// CHECK_SKL_M64: #define __AES__ 1 ++// CHECK_SKL_M64: #define __AVX2__ 1 ++// CHECK_SKL_M64: #define __AVX__ 1 ++// CHECK_SKL_M64: #define __BMI2__ 1 ++// CHECK_SKL_M64: #define __BMI__ 1 ++// CHECK_SKL_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_SKL_M64: #define __F16C__ 1 ++// CHECK_SKL_M64: #define __FMA__ 1 ++// CHECK_SKL_M64: #define __INVPCID__ 1 ++// CHECK_SKL_M64: #define __LZCNT__ 1 ++// CHECK_SKL_M64: #define __MMX__ 1 ++// CHECK_SKL_M64: #define __MOVBE__ 1 ++// CHECK_SKL_M64: #define __PCLMUL__ 1 ++// CHECK_SKL_M64: #define __POPCNT__ 1 ++// CHECK_SKL_M64: #define __PRFCHW__ 1 ++// CHECK_SKL_M64: #define __RDRND__ 1 ++// CHECK_SKL_M64: #define __RDSEED__ 1 ++// CHECK_SKL_M64: #define __SGX__ 1 ++// CHECK_SKL_M64: #define __SSE2_MATH__ 1 ++// CHECK_SKL_M64: #define __SSE2__ 1 ++// CHECK_SKL_M64: #define __SSE3__ 1 ++// CHECK_SKL_M64: #define __SSE4_1__ 1 ++// CHECK_SKL_M64: #define __SSE4_2__ 1 ++// CHECK_SKL_M64: #define __SSE_MATH__ 1 ++// CHECK_SKL_M64: #define __SSE__ 1 ++// CHECK_SKL_M64: #define __SSSE3__ 1 ++// CHECK_SKL_M64: #define __XSAVEC__ 1 ++// CHECK_SKL_M64: #define __XSAVEOPT__ 1 ++// CHECK_SKL_M64: #define __XSAVES__ 1 ++// CHECK_SKL_M64: #define __XSAVE__ 1 ++// CHECK_SKL_M64: #define __amd64 1 ++// CHECK_SKL_M64: #define __amd64__ 1 ++// CHECK_SKL_M64: #define __x86_64 1 ++// CHECK_SKL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=knl -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNL_M32 ++// CHECK_KNL_M32: #define __AES__ 1 ++// CHECK_KNL_M32: #define __AVX2__ 1 ++// CHECK_KNL_M32: #define __AVX512CD__ 1 ++// CHECK_KNL_M32: #define __AVX512ER__ 1 ++// CHECK_KNL_M32: #define __AVX512F__ 1 ++// CHECK_KNL_M32: #define __AVX512PF__ 1 ++// CHECK_KNL_M32: #define __AVX__ 1 ++// CHECK_KNL_M32: #define __BMI2__ 1 ++// CHECK_KNL_M32: #define __BMI__ 1 ++// CHECK_KNL_M32: #define __F16C__ 1 ++// CHECK_KNL_M32: #define __FMA__ 1 ++// CHECK_KNL_M32: #define __LZCNT__ 1 ++// CHECK_KNL_M32: #define __MMX__ 1 ++// CHECK_KNL_M32: #define __MOVBE__ 1 ++// CHECK_KNL_M32: #define __PCLMUL__ 1 ++// CHECK_KNL_M32: #define __POPCNT__ 1 ++// CHECK_KNL_M32: #define __PREFETCHWT1__ 1 ++// CHECK_KNL_M32: #define __PRFCHW__ 1 ++// CHECK_KNL_M32: #define __RDRND__ 1 ++// CHECK_KNL_M32: #define __SSE2__ 1 ++// CHECK_KNL_M32: #define __SSE3__ 1 ++// CHECK_KNL_M32: #define __SSE4_1__ 1 ++// CHECK_KNL_M32: #define __SSE4_2__ 1 ++// CHECK_KNL_M32: #define __SSE__ 1 ++// CHECK_KNL_M32: #define __SSSE3__ 1 ++// CHECK_KNL_M32: #define __XSAVEOPT__ 1 ++// CHECK_KNL_M32: #define __XSAVE__ 1 ++// CHECK_KNL_M32: #define __i386 1 ++// CHECK_KNL_M32: #define __i386__ 1 ++// CHECK_KNL_M32: #define __knl 1 ++// CHECK_KNL_M32: #define __knl__ 1 ++// CHECK_KNL_M32: #define __tune_knl__ 1 ++// CHECK_KNL_M32: #define i386 1 ++ ++// RUN: %clang -march=knl -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNL_M64 ++// CHECK_KNL_M64: #define __AES__ 1 ++// CHECK_KNL_M64: #define __AVX2__ 1 ++// CHECK_KNL_M64: #define __AVX512CD__ 1 ++// CHECK_KNL_M64: #define __AVX512ER__ 1 ++// CHECK_KNL_M64: #define __AVX512F__ 1 ++// CHECK_KNL_M64: #define __AVX512PF__ 1 ++// CHECK_KNL_M64: #define __AVX__ 1 ++// CHECK_KNL_M64: #define __BMI2__ 1 ++// CHECK_KNL_M64: #define __BMI__ 1 ++// CHECK_KNL_M64: #define __F16C__ 1 ++// CHECK_KNL_M64: #define __FMA__ 1 ++// CHECK_KNL_M64: #define __LZCNT__ 1 ++// CHECK_KNL_M64: #define __MMX__ 1 ++// CHECK_KNL_M64: #define __MOVBE__ 1 ++// CHECK_KNL_M64: #define __PCLMUL__ 1 ++// CHECK_KNL_M64: #define __POPCNT__ 1 ++// CHECK_KNL_M64: #define __PREFETCHWT1__ 1 ++// CHECK_KNL_M64: #define __PRFCHW__ 1 ++// CHECK_KNL_M64: #define __RDRND__ 1 ++// CHECK_KNL_M64: #define __SSE2_MATH__ 1 ++// CHECK_KNL_M64: #define __SSE2__ 1 ++// CHECK_KNL_M64: #define __SSE3__ 1 ++// CHECK_KNL_M64: #define __SSE4_1__ 1 ++// CHECK_KNL_M64: #define __SSE4_2__ 1 ++// CHECK_KNL_M64: #define __SSE_MATH__ 1 ++// CHECK_KNL_M64: #define __SSE__ 1 ++// CHECK_KNL_M64: #define __SSSE3__ 1 ++// CHECK_KNL_M64: #define __XSAVEOPT__ 1 ++// CHECK_KNL_M64: #define __XSAVE__ 1 ++// CHECK_KNL_M64: #define __amd64 1 ++// CHECK_KNL_M64: #define __amd64__ 1 ++// CHECK_KNL_M64: #define __knl 1 ++// CHECK_KNL_M64: #define __knl__ 1 ++// CHECK_KNL_M64: #define __tune_knl__ 1 ++// CHECK_KNL_M64: #define __x86_64 1 ++// CHECK_KNL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=knm -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M32 ++// CHECK_KNM_M32: #define __AES__ 1 ++// CHECK_KNM_M32: #define __AVX2__ 1 ++// CHECK_KNM_M32: #define __AVX512CD__ 1 ++// CHECK_KNM_M32: #define __AVX512ER__ 1 ++// CHECK_KNM_M32: #define __AVX512F__ 1 ++// CHECK_KNM_M32: #define __AVX512PF__ 1 ++// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_KNM_M32: #define __AVX__ 1 ++// CHECK_KNM_M32: #define __BMI2__ 1 ++// CHECK_KNM_M32: #define __BMI__ 1 ++// CHECK_KNM_M32: #define __F16C__ 1 ++// CHECK_KNM_M32: #define __FMA__ 1 ++// CHECK_KNM_M32: #define __LZCNT__ 1 ++// CHECK_KNM_M32: #define __MMX__ 1 ++// CHECK_KNM_M32: #define __MOVBE__ 1 ++// CHECK_KNM_M32: #define __PCLMUL__ 1 ++// CHECK_KNM_M32: #define __POPCNT__ 1 ++// CHECK_KNM_M32: #define __PREFETCHWT1__ 1 ++// CHECK_KNM_M32: #define __PRFCHW__ 1 ++// CHECK_KNM_M32: #define __RDRND__ 1 ++// CHECK_KNM_M32: #define __SSE2__ 1 ++// CHECK_KNM_M32: #define __SSE3__ 1 ++// CHECK_KNM_M32: #define __SSE4_1__ 1 ++// CHECK_KNM_M32: #define __SSE4_2__ 1 ++// CHECK_KNM_M32: #define __SSE__ 1 ++// CHECK_KNM_M32: #define __SSSE3__ 1 ++// CHECK_KNM_M32: #define __XSAVEOPT__ 1 ++// CHECK_KNM_M32: #define __XSAVE__ 1 ++// CHECK_KNM_M32: #define __i386 1 ++// CHECK_KNM_M32: #define __i386__ 1 ++// CHECK_KNM_M32: #define i386 1 ++ ++// RUN: %clang -march=knm -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M64 ++// CHECK_KNM_M64: #define __AES__ 1 ++// CHECK_KNM_M64: #define __AVX2__ 1 ++// CHECK_KNM_M64: #define __AVX512CD__ 1 ++// CHECK_KNM_M64: #define __AVX512ER__ 1 ++// CHECK_KNM_M64: #define __AVX512F__ 1 ++// CHECK_KNM_M64: #define __AVX512PF__ 1 ++// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_KNM_M64: #define __AVX__ 1 ++// CHECK_KNM_M64: #define __BMI2__ 1 ++// CHECK_KNM_M64: #define __BMI__ 1 ++// CHECK_KNM_M64: #define __F16C__ 1 ++// CHECK_KNM_M64: #define __FMA__ 1 ++// CHECK_KNM_M64: #define __LZCNT__ 1 ++// CHECK_KNM_M64: #define __MMX__ 1 ++// CHECK_KNM_M64: #define __MOVBE__ 1 ++// CHECK_KNM_M64: #define __PCLMUL__ 1 ++// CHECK_KNM_M64: #define __POPCNT__ 1 ++// CHECK_KNM_M64: #define __PREFETCHWT1__ 1 ++// CHECK_KNM_M64: #define __PRFCHW__ 1 ++// CHECK_KNM_M64: #define __RDRND__ 1 ++// CHECK_KNM_M64: #define __SSE2_MATH__ 1 ++// CHECK_KNM_M64: #define __SSE2__ 1 ++// CHECK_KNM_M64: #define __SSE3__ 1 ++// CHECK_KNM_M64: #define __SSE4_1__ 1 ++// CHECK_KNM_M64: #define __SSE4_2__ 1 ++// CHECK_KNM_M64: #define __SSE_MATH__ 1 ++// CHECK_KNM_M64: #define __SSE__ 1 ++// CHECK_KNM_M64: #define __SSSE3__ 1 ++// CHECK_KNM_M64: #define __XSAVEOPT__ 1 ++// CHECK_KNM_M64: #define __XSAVE__ 1 ++// CHECK_KNM_M64: #define __amd64 1 ++// CHECK_KNM_M64: #define __amd64__ 1 ++// CHECK_KNM_M64: #define __x86_64 1 ++// CHECK_KNM_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=skylake-avx512 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SKX_M32 ++// CHECK_SKX_M32: #define __AES__ 1 ++// CHECK_SKX_M32: #define __AVX2__ 1 ++// CHECK_SKX_M32: #define __AVX512BW__ 1 ++// CHECK_SKX_M32: #define __AVX512CD__ 1 ++// CHECK_SKX_M32: #define __AVX512DQ__ 1 ++// CHECK_SKX_M32: #define __AVX512F__ 1 ++// CHECK_SKX_M32: #define __AVX512VL__ 1 ++// CHECK_SKX_M32: #define __AVX__ 1 ++// CHECK_SKX_M32: #define __BMI2__ 1 ++// CHECK_SKX_M32: #define __BMI__ 1 ++// CHECK_SKX_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_SKX_M32: #define __CLWB__ 1 ++// CHECK_SKX_M32: #define __F16C__ 1 ++// CHECK_SKX_M32: #define __FMA__ 1 ++// CHECK_SKX_M32: #define __INVPCID__ 1 ++// CHECK_SKX_M32: #define __LZCNT__ 1 ++// CHECK_SKX_M32: #define __MMX__ 1 ++// CHECK_SKX_M32: #define __MOVBE__ 1 ++// CHECK_SKX_M32: #define __PCLMUL__ 1 ++// CHECK_SKX_M32: #define __PKU__ 1 ++// CHECK_SKX_M32: #define __POPCNT__ 1 ++// CHECK_SKX_M32: #define __PRFCHW__ 1 ++// CHECK_SKX_M32: #define __RDRND__ 1 ++// CHECK_SKX_M32: #define __RDSEED__ 1 ++// CHECK_SKX_M32-NOT: #define __SGX__ 1 ++// CHECK_SKX_M32: #define __SSE2__ 1 ++// CHECK_SKX_M32: #define __SSE3__ 1 ++// CHECK_SKX_M32: #define __SSE4_1__ 1 ++// CHECK_SKX_M32: #define __SSE4_2__ 1 ++// CHECK_SKX_M32: #define __SSE__ 1 ++// CHECK_SKX_M32: #define __SSSE3__ 1 ++// CHECK_SKX_M32: #define __XSAVEC__ 1 ++// CHECK_SKX_M32: #define __XSAVEOPT__ 1 ++// CHECK_SKX_M32: #define __XSAVES__ 1 ++// CHECK_SKX_M32: #define __XSAVE__ 1 ++// CHECK_SKX_M32: #define __corei7 1 ++// CHECK_SKX_M32: #define __corei7__ 1 ++// CHECK_SKX_M32: #define __i386 1 ++// CHECK_SKX_M32: #define __i386__ 1 ++// CHECK_SKX_M32: #define __tune_corei7__ 1 ++// CHECK_SKX_M32: #define i386 1 ++ ++// RUN: %clang -march=skylake-avx512 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SKX_M64 ++// CHECK_SKX_M64: #define __AES__ 1 ++// CHECK_SKX_M64: #define __AVX2__ 1 ++// CHECK_SKX_M64: #define __AVX512BW__ 1 ++// CHECK_SKX_M64: #define __AVX512CD__ 1 ++// CHECK_SKX_M64: #define __AVX512DQ__ 1 ++// CHECK_SKX_M64: #define __AVX512F__ 1 ++// CHECK_SKX_M64: #define __AVX512VL__ 1 ++// CHECK_SKX_M64: #define __AVX__ 1 ++// CHECK_SKX_M64: #define __BMI2__ 1 ++// CHECK_SKX_M64: #define __BMI__ 1 ++// CHECK_SKX_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_SKX_M64: #define __CLWB__ 1 ++// CHECK_SKX_M64: #define __F16C__ 1 ++// CHECK_SKX_M64: #define __FMA__ 1 ++// CHECK_SKX_M64: #define __INVPCID__ 1 ++// CHECK_SKX_M64: #define __LZCNT__ 1 ++// CHECK_SKX_M64: #define __MMX__ 1 ++// CHECK_SKX_M64: #define __MOVBE__ 1 ++// CHECK_SKX_M64: #define __PCLMUL__ 1 ++// CHECK_SKX_M64: #define __PKU__ 1 ++// CHECK_SKX_M64: #define __POPCNT__ 1 ++// CHECK_SKX_M64: #define __PRFCHW__ 1 ++// CHECK_SKX_M64: #define __RDRND__ 1 ++// CHECK_SKX_M64: #define __RDSEED__ 1 ++// CHECK_SKX_M64-NOT: #define __SGX__ 1 ++// CHECK_SKX_M64: #define __SSE2_MATH__ 1 ++// CHECK_SKX_M64: #define __SSE2__ 1 ++// CHECK_SKX_M64: #define __SSE3__ 1 ++// CHECK_SKX_M64: #define __SSE4_1__ 1 ++// CHECK_SKX_M64: #define __SSE4_2__ 1 ++// CHECK_SKX_M64: #define __SSE_MATH__ 1 ++// CHECK_SKX_M64: #define __SSE__ 1 ++// CHECK_SKX_M64: #define __SSSE3__ 1 ++// CHECK_SKX_M64: #define __XSAVEC__ 1 ++// CHECK_SKX_M64: #define __XSAVEOPT__ 1 ++// CHECK_SKX_M64: #define __XSAVES__ 1 ++// CHECK_SKX_M64: #define __XSAVE__ 1 ++// CHECK_SKX_M64: #define __amd64 1 ++// CHECK_SKX_M64: #define __amd64__ 1 ++// CHECK_SKX_M64: #define __corei7 1 ++// CHECK_SKX_M64: #define __corei7__ 1 ++// CHECK_SKX_M64: #define __tune_corei7__ 1 ++// CHECK_SKX_M64: #define __x86_64 1 ++// CHECK_SKX_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=cascadelake -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CLX_M32 ++// CHECK_CLX_M32: #define __AES__ 1 ++// CHECK_CLX_M32: #define __AVX2__ 1 ++// CHECK_CLX_M32: #define __AVX512BW__ 1 ++// CHECK_CLX_M32: #define __AVX512CD__ 1 ++// CHECK_CLX_M32: #define __AVX512DQ__ 1 ++// CHECK_CLX_M32: #define __AVX512F__ 1 ++// CHECK_CLX_M32: #define __AVX512VL__ 1 ++// CHECK_CLX_M32: #define __AVX512VNNI__ 1 ++// CHECK_CLX_M32: #define __AVX__ 1 ++// CHECK_CLX_M32: #define __BMI2__ 1 ++// CHECK_CLX_M32: #define __BMI__ 1 ++// CHECK_CLX_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_CLX_M32: #define __CLWB__ 1 ++// CHECK_CLX_M32: #define __F16C__ 1 ++// CHECK_CLX_M32: #define __FMA__ 1 ++// CHECK_CLX_M32: #define __INVPCID__ 1 ++// CHECK_CLX_M32: #define __LZCNT__ 1 ++// CHECK_CLX_M32: #define __MMX__ 1 ++// CHECK_CLX_M32: #define __MOVBE__ 1 ++// CHECK_CLX_M32: #define __PCLMUL__ 1 ++// CHECK_CLX_M32: #define __PKU__ 1 ++// CHECK_CLX_M32: #define __POPCNT__ 1 ++// CHECK_CLX_M32: #define __PRFCHW__ 1 ++// CHECK_CLX_M32: #define __RDRND__ 1 ++// CHECK_CLX_M32: #define __RDSEED__ 1 ++// CHECK_CLX_M32-NOT: #define __SGX__ 1 ++// CHECK_CLX_M32: #define __SSE2__ 1 ++// CHECK_CLX_M32: #define __SSE3__ 1 ++// CHECK_CLX_M32: #define __SSE4_1__ 1 ++// CHECK_CLX_M32: #define __SSE4_2__ 1 ++// CHECK_CLX_M32: #define __SSE__ 1 ++// CHECK_CLX_M32: #define __SSSE3__ 1 ++// CHECK_CLX_M32: #define __XSAVEC__ 1 ++// CHECK_CLX_M32: #define __XSAVEOPT__ 1 ++// CHECK_CLX_M32: #define __XSAVES__ 1 ++// CHECK_CLX_M32: #define __XSAVE__ 1 ++// CHECK_CLX_M32: #define __corei7 1 ++// CHECK_CLX_M32: #define __corei7__ 1 ++// CHECK_CLX_M32: #define __i386 1 ++// CHECK_CLX_M32: #define __i386__ 1 ++// CHECK_CLX_M32: #define __tune_corei7__ 1 ++// CHECK_CLX_M32: #define i386 1 ++ ++// RUN: %clang -march=cascadelake -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CLX_M64 ++// CHECK_CLX_M64: #define __AES__ 1 ++// CHECK_CLX_M64: #define __AVX2__ 1 ++// CHECK_CLX_M64: #define __AVX512BW__ 1 ++// CHECK_CLX_M64: #define __AVX512CD__ 1 ++// CHECK_CLX_M64: #define __AVX512DQ__ 1 ++// CHECK_CLX_M64: #define __AVX512F__ 1 ++// CHECK_CLX_M64: #define __AVX512VL__ 1 ++// CHECK_CLX_M64: #define __AVX512VNNI__ 1 ++// CHECK_CLX_M64: #define __AVX__ 1 ++// CHECK_CLX_M64: #define __BMI2__ 1 ++// CHECK_CLX_M64: #define __BMI__ 1 ++// CHECK_CLX_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_CLX_M64: #define __CLWB__ 1 ++// CHECK_CLX_M64: #define __F16C__ 1 ++// CHECK_CLX_M64: #define __FMA__ 1 ++// CHECK_CLX_M64: #define __INVPCID__ 1 ++// CHECK_CLX_M64: #define __LZCNT__ 1 ++// CHECK_CLX_M64: #define __MMX__ 1 ++// CHECK_CLX_M64: #define __MOVBE__ 1 ++// CHECK_CLX_M64: #define __PCLMUL__ 1 ++// CHECK_CLX_M64: #define __PKU__ 1 ++// CHECK_CLX_M64: #define __POPCNT__ 1 ++// CHECK_CLX_M64: #define __PRFCHW__ 1 ++// CHECK_CLX_M64: #define __RDRND__ 1 ++// CHECK_CLX_M64: #define __RDSEED__ 1 ++// CHECK_CLX_M64-NOT: #define __SGX__ 1 ++// CHECK_CLX_M64: #define __SSE2_MATH__ 1 ++// CHECK_CLX_M64: #define __SSE2__ 1 ++// CHECK_CLX_M64: #define __SSE3__ 1 ++// CHECK_CLX_M64: #define __SSE4_1__ 1 ++// CHECK_CLX_M64: #define __SSE4_2__ 1 ++// CHECK_CLX_M64: #define __SSE_MATH__ 1 ++// CHECK_CLX_M64: #define __SSE__ 1 ++// CHECK_CLX_M64: #define __SSSE3__ 1 ++// CHECK_CLX_M64: #define __XSAVEC__ 1 ++// CHECK_CLX_M64: #define __XSAVEOPT__ 1 ++// CHECK_CLX_M64: #define __XSAVES__ 1 ++// CHECK_CLX_M64: #define __XSAVE__ 1 ++// CHECK_CLX_M64: #define __amd64 1 ++// CHECK_CLX_M64: #define __amd64__ 1 ++// CHECK_CLX_M64: #define __corei7 1 ++// CHECK_CLX_M64: #define __corei7__ 1 ++// CHECK_CLX_M64: #define __tune_corei7__ 1 ++// CHECK_CLX_M64: #define __x86_64 1 ++// CHECK_CLX_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=cooperlake -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CPX_M32 ++// CHECK_CPX_M32: #define __AES__ 1 ++// CHECK_CPX_M32: #define __AVX2__ 1 ++// CHECK_CPX_M32: #define __AVX512BF16__ 1 ++// CHECK_CPX_M32: #define __AVX512BW__ 1 ++// CHECK_CPX_M32: #define __AVX512CD__ 1 ++// CHECK_CPX_M32: #define __AVX512DQ__ 1 ++// CHECK_CPX_M32: #define __AVX512F__ 1 ++// CHECK_CPX_M32: #define __AVX512VL__ 1 ++// CHECK_CPX_M32: #define __AVX512VNNI__ 1 ++// CHECK_CPX_M32: #define __AVX__ 1 ++// CHECK_CPX_M32: #define __BMI2__ 1 ++// CHECK_CPX_M32: #define __BMI__ 1 ++// CHECK_CPX_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_CPX_M32: #define __CLWB__ 1 ++// CHECK_CPX_M32: #define __F16C__ 1 ++// CHECK_CPX_M32: #define __FMA__ 1 ++// CHECK_CPX_M32: #define __INVPCID__ 1 ++// CHECK_CPX_M32: #define __LZCNT__ 1 ++// CHECK_CPX_M32: #define __MMX__ 1 ++// CHECK_CPX_M32: #define __MOVBE__ 1 ++// CHECK_CPX_M32: #define __PCLMUL__ 1 ++// CHECK_CPX_M32: #define __PKU__ 1 ++// CHECK_CPX_M32: #define __POPCNT__ 1 ++// CHECK_CPX_M32: #define __PRFCHW__ 1 ++// CHECK_CPX_M32: #define __RDRND__ 1 ++// CHECK_CPX_M32: #define __RDSEED__ 1 ++// CHECK_CPX_M32-NOT: #define __SGX__ 1 ++// CHECK_CPX_M32: #define __SSE2__ 1 ++// CHECK_CPX_M32: #define __SSE3__ 1 ++// CHECK_CPX_M32: #define __SSE4_1__ 1 ++// CHECK_CPX_M32: #define __SSE4_2__ 1 ++// CHECK_CPX_M32: #define __SSE__ 1 ++// CHECK_CPX_M32: #define __SSSE3__ 1 ++// CHECK_CPX_M32: #define __XSAVEC__ 1 ++// CHECK_CPX_M32: #define __XSAVEOPT__ 1 ++// CHECK_CPX_M32: #define __XSAVES__ 1 ++// CHECK_CPX_M32: #define __XSAVE__ 1 ++// CHECK_CPX_M32: #define __corei7 1 ++// CHECK_CPX_M32: #define __corei7__ 1 ++// CHECK_CPX_M32: #define __i386 1 ++// CHECK_CPX_M32: #define __i386__ 1 ++// CHECK_CPX_M32: #define __tune_corei7__ 1 ++// CHECK_CPX_M32: #define i386 1 ++ ++// RUN: %clang -march=cooperlake -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CPX_M64 ++// CHECK_CPX_M64: #define __AES__ 1 ++// CHECK_CPX_M64: #define __AVX2__ 1 ++// CHECK_CPX_M64: #define __AVX512BF16__ 1 ++// CHECK_CPX_M64: #define __AVX512BW__ 1 ++// CHECK_CPX_M64: #define __AVX512CD__ 1 ++// CHECK_CPX_M64: #define __AVX512DQ__ 1 ++// CHECK_CPX_M64: #define __AVX512F__ 1 ++// CHECK_CPX_M64: #define __AVX512VL__ 1 ++// CHECK_CPX_M64: #define __AVX512VNNI__ 1 ++// CHECK_CPX_M64: #define __AVX__ 1 ++// CHECK_CPX_M64: #define __BMI2__ 1 ++// CHECK_CPX_M64: #define __BMI__ 1 ++// CHECK_CPX_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_CPX_M64: #define __CLWB__ 1 ++// CHECK_CPX_M64: #define __F16C__ 1 ++// CHECK_CPX_M64: #define __FMA__ 1 ++// CHECK_CPX_M64: #define __INVPCID__ 1 ++// CHECK_CPX_M64: #define __LZCNT__ 1 ++// CHECK_CPX_M64: #define __MMX__ 1 ++// CHECK_CPX_M64: #define __MOVBE__ 1 ++// CHECK_CPX_M64: #define __PCLMUL__ 1 ++// CHECK_CPX_M64: #define __PKU__ 1 ++// CHECK_CPX_M64: #define __POPCNT__ 1 ++// CHECK_CPX_M64: #define __PRFCHW__ 1 ++// CHECK_CPX_M64: #define __RDRND__ 1 ++// CHECK_CPX_M64: #define __RDSEED__ 1 ++// CHECK_CPX_M64-NOT: #define __SGX__ 1 ++// CHECK_CPX_M64: #define __SSE2_MATH__ 1 ++// CHECK_CPX_M64: #define __SSE2__ 1 ++// CHECK_CPX_M64: #define __SSE3__ 1 ++// CHECK_CPX_M64: #define __SSE4_1__ 1 ++// CHECK_CPX_M64: #define __SSE4_2__ 1 ++// CHECK_CPX_M64: #define __SSE_MATH__ 1 ++// CHECK_CPX_M64: #define __SSE__ 1 ++// CHECK_CPX_M64: #define __SSSE3__ 1 ++// CHECK_CPX_M64: #define __XSAVEC__ 1 ++// CHECK_CPX_M64: #define __XSAVEOPT__ 1 ++// CHECK_CPX_M64: #define __XSAVES__ 1 ++// CHECK_CPX_M64: #define __XSAVE__ 1 ++// CHECK_CPX_M64: #define __amd64 1 ++// CHECK_CPX_M64: #define __amd64__ 1 ++// CHECK_CPX_M64: #define __corei7 1 ++// CHECK_CPX_M64: #define __corei7__ 1 ++// CHECK_CPX_M64: #define __tune_corei7__ 1 ++// CHECK_CPX_M64: #define __x86_64 1 ++// CHECK_CPX_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=cannonlake -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CNL_M32 ++// CHECK_CNL_M32: #define __AES__ 1 ++// CHECK_CNL_M32: #define __AVX2__ 1 ++// CHECK_CNL_M32: #define __AVX512BW__ 1 ++// CHECK_CNL_M32: #define __AVX512CD__ 1 ++// CHECK_CNL_M32: #define __AVX512DQ__ 1 ++// CHECK_CNL_M32: #define __AVX512F__ 1 ++// CHECK_CNL_M32: #define __AVX512IFMA__ 1 ++// CHECK_CNL_M32: #define __AVX512VBMI__ 1 ++// CHECK_CNL_M32: #define __AVX512VL__ 1 ++// CHECK_CNL_M32: #define __AVX__ 1 ++// CHECK_CNL_M32: #define __BMI2__ 1 ++// CHECK_CNL_M32: #define __BMI__ 1 ++// CHECK_CNL_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_CNL_M32-NOT: #define __CLWB__ 1 ++// CHECK_CNL_M32: #define __F16C__ 1 ++// CHECK_CNL_M32: #define __FMA__ 1 ++// CHECK_CNL_M32: #define __INVPCID__ 1 ++// CHECK_CNL_M32: #define __LZCNT__ 1 ++// CHECK_CNL_M32: #define __MMX__ 1 ++// CHECK_CNL_M32: #define __MOVBE__ 1 ++// CHECK_CNL_M32: #define __PCLMUL__ 1 ++// CHECK_CNL_M32: #define __PKU__ 1 ++// CHECK_CNL_M32: #define __POPCNT__ 1 ++// CHECK_CNL_M32: #define __PRFCHW__ 1 ++// CHECK_CNL_M32: #define __RDRND__ 1 ++// CHECK_CNL_M32: #define __RDSEED__ 1 ++// CHECK_CNL_M32: #define __SGX__ 1 ++// CHECK_CNL_M32: #define __SHA__ 1 ++// CHECK_CNL_M32: #define __SSE2__ 1 ++// CHECK_CNL_M32: #define __SSE3__ 1 ++// CHECK_CNL_M32: #define __SSE4_1__ 1 ++// CHECK_CNL_M32: #define __SSE4_2__ 1 ++// CHECK_CNL_M32: #define __SSE__ 1 ++// CHECK_CNL_M32: #define __SSSE3__ 1 ++// CHECK_CNL_M32: #define __XSAVEC__ 1 ++// CHECK_CNL_M32: #define __XSAVEOPT__ 1 ++// CHECK_CNL_M32: #define __XSAVES__ 1 ++// CHECK_CNL_M32: #define __XSAVE__ 1 ++// CHECK_CNL_M32: #define __corei7 1 ++// CHECK_CNL_M32: #define __corei7__ 1 ++// CHECK_CNL_M32: #define __i386 1 ++// CHECK_CNL_M32: #define __i386__ 1 ++// CHECK_CNL_M32: #define __tune_corei7__ 1 ++// CHECK_CNL_M32: #define i386 1 ++ ++// RUN: %clang -march=cannonlake -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_CNL_M64 ++// CHECK_CNL_M64: #define __AES__ 1 ++// CHECK_CNL_M64: #define __AVX2__ 1 ++// CHECK_CNL_M64: #define __AVX512BW__ 1 ++// CHECK_CNL_M64: #define __AVX512CD__ 1 ++// CHECK_CNL_M64: #define __AVX512DQ__ 1 ++// CHECK_CNL_M64: #define __AVX512F__ 1 ++// CHECK_CNL_M64: #define __AVX512IFMA__ 1 ++// CHECK_CNL_M64: #define __AVX512VBMI__ 1 ++// CHECK_CNL_M64: #define __AVX512VL__ 1 ++// CHECK_CNL_M64: #define __AVX__ 1 ++// CHECK_CNL_M64: #define __BMI2__ 1 ++// CHECK_CNL_M64: #define __BMI__ 1 ++// CHECK_CNL_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_CNL_M64-NOT: #define __CLWB__ 1 ++// CHECK_CNL_M64: #define __F16C__ 1 ++// CHECK_CNL_M64: #define __FMA__ 1 ++// CHECK_CNL_M64: #define __INVPCID__ 1 ++// CHECK_CNL_M64: #define __LZCNT__ 1 ++// CHECK_CNL_M64: #define __MMX__ 1 ++// CHECK_CNL_M64: #define __MOVBE__ 1 ++// CHECK_CNL_M64: #define __PCLMUL__ 1 ++// CHECK_CNL_M64: #define __PKU__ 1 ++// CHECK_CNL_M64: #define __POPCNT__ 1 ++// CHECK_CNL_M64: #define __PRFCHW__ 1 ++// CHECK_CNL_M64: #define __RDRND__ 1 ++// CHECK_CNL_M64: #define __RDSEED__ 1 ++// CHECK_CNL_M64: #define __SGX__ 1 ++// CHECK_CNL_M64: #define __SHA__ 1 ++// CHECK_CNL_M64: #define __SSE2__ 1 ++// CHECK_CNL_M64: #define __SSE3__ 1 ++// CHECK_CNL_M64: #define __SSE4_1__ 1 ++// CHECK_CNL_M64: #define __SSE4_2__ 1 ++// CHECK_CNL_M64: #define __SSE__ 1 ++// CHECK_CNL_M64: #define __SSSE3__ 1 ++// CHECK_CNL_M64: #define __XSAVEC__ 1 ++// CHECK_CNL_M64: #define __XSAVEOPT__ 1 ++// CHECK_CNL_M64: #define __XSAVES__ 1 ++// CHECK_CNL_M64: #define __XSAVE__ 1 ++// CHECK_CNL_M64: #define __amd64 1 ++// CHECK_CNL_M64: #define __amd64__ 1 ++// CHECK_CNL_M64: #define __corei7 1 ++// CHECK_CNL_M64: #define __corei7__ 1 ++// CHECK_CNL_M64: #define __tune_corei7__ 1 ++// CHECK_CNL_M64: #define __x86_64 1 ++// CHECK_CNL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=icelake-client -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32 ++// CHECK_ICL_M32: #define __AES__ 1 ++// CHECK_ICL_M32: #define __AVX2__ 1 ++// CHECK_ICL_M32: #define __AVX512BITALG__ 1 ++// CHECK_ICL_M32: #define __AVX512BW__ 1 ++// CHECK_ICL_M32: #define __AVX512CD__ 1 ++// CHECK_ICL_M32: #define __AVX512DQ__ 1 ++// CHECK_ICL_M32: #define __AVX512F__ 1 ++// CHECK_ICL_M32: #define __AVX512IFMA__ 1 ++// CHECK_ICL_M32: #define __AVX512VBMI2__ 1 ++// CHECK_ICL_M32: #define __AVX512VBMI__ 1 ++// CHECK_ICL_M32: #define __AVX512VL__ 1 ++// CHECK_ICL_M32: #define __AVX512VNNI__ 1 ++// CHECK_ICL_M32: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_ICL_M32: #define __AVX__ 1 ++// CHECK_ICL_M32: #define __BMI2__ 1 ++// CHECK_ICL_M32: #define __BMI__ 1 ++// CHECK_ICL_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_ICL_M32: #define __CLWB__ 1 ++// CHECK_ICL_M32: #define __F16C__ 1 ++// CHECK_ICL_M32: #define __FMA__ 1 ++// CHECK_ICL_M32: #define __GFNI__ 1 ++// CHECK_ICL_M32: #define __INVPCID__ 1 ++// CHECK_ICL_M32: #define __LZCNT__ 1 ++// CHECK_ICL_M32: #define __MMX__ 1 ++// CHECK_ICL_M32: #define __MOVBE__ 1 ++// CHECK_ICL_M32: #define __PCLMUL__ 1 ++// CHECK_ICL_M32: #define __PKU__ 1 ++// CHECK_ICL_M32: #define __POPCNT__ 1 ++// CHECK_ICL_M32: #define __PRFCHW__ 1 ++// CHECK_ICL_M32: #define __RDPID__ 1 ++// CHECK_ICL_M32: #define __RDRND__ 1 ++// CHECK_ICL_M32: #define __RDSEED__ 1 ++// CHECK_ICL_M32: #define __SGX__ 1 ++// CHECK_ICL_M32: #define __SHA__ 1 ++// CHECK_ICL_M32: #define __SSE2__ 1 ++// CHECK_ICL_M32: #define __SSE3__ 1 ++// CHECK_ICL_M32: #define __SSE4_1__ 1 ++// CHECK_ICL_M32: #define __SSE4_2__ 1 ++// CHECK_ICL_M32: #define __SSE__ 1 ++// CHECK_ICL_M32: #define __SSSE3__ 1 ++// CHECK_ICL_M32: #define __VAES__ 1 ++// CHECK_ICL_M32: #define __VPCLMULQDQ__ 1 ++// CHECK_ICL_M32-NOT: #define __WBNOINVD__ 1 ++// CHECK_ICL_M32: #define __XSAVEC__ 1 ++// CHECK_ICL_M32: #define __XSAVEOPT__ 1 ++// CHECK_ICL_M32: #define __XSAVES__ 1 ++// CHECK_ICL_M32: #define __XSAVE__ 1 ++// CHECK_ICL_M32: #define __corei7 1 ++// CHECK_ICL_M32: #define __corei7__ 1 ++// CHECK_ICL_M32: #define __i386 1 ++// CHECK_ICL_M32: #define __i386__ 1 ++// CHECK_ICL_M32: #define __tune_corei7__ 1 ++// CHECK_ICL_M32: #define i386 1 ++ ++// RUN: %clang -march=icelake-client -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64 ++// CHECK_ICL_M64: #define __AES__ 1 ++// CHECK_ICL_M64: #define __AVX2__ 1 ++// CHECK_ICL_M64: #define __AVX512BITALG__ 1 ++// CHECK_ICL_M64: #define __AVX512BW__ 1 ++// CHECK_ICL_M64: #define __AVX512CD__ 1 ++// CHECK_ICL_M64: #define __AVX512DQ__ 1 ++// CHECK_ICL_M64: #define __AVX512F__ 1 ++// CHECK_ICL_M64: #define __AVX512IFMA__ 1 ++// CHECK_ICL_M64: #define __AVX512VBMI2__ 1 ++// CHECK_ICL_M64: #define __AVX512VBMI__ 1 ++// CHECK_ICL_M64: #define __AVX512VL__ 1 ++// CHECK_ICL_M64: #define __AVX512VNNI__ 1 ++// CHECK_ICL_M64: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_ICL_M64: #define __AVX__ 1 ++// CHECK_ICL_M64: #define __BMI2__ 1 ++// CHECK_ICL_M64: #define __BMI__ 1 ++// CHECK_ICL_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_ICL_M64: #define __CLWB__ 1 ++// CHECK_ICL_M64: #define __F16C__ 1 ++// CHECK_ICL_M64: #define __FMA__ 1 ++// CHECK_ICL_M64: #define __GFNI__ 1 ++// CHECK_ICL_M64: #define __INVPCID__ 1 ++// CHECK_ICL_M64: #define __LZCNT__ 1 ++// CHECK_ICL_M64: #define __MMX__ 1 ++// CHECK_ICL_M64: #define __MOVBE__ 1 ++// CHECK_ICL_M64: #define __PCLMUL__ 1 ++// CHECK_ICL_M64: #define __PKU__ 1 ++// CHECK_ICL_M64: #define __POPCNT__ 1 ++// CHECK_ICL_M64: #define __PRFCHW__ 1 ++// CHECK_ICL_M64: #define __RDPID__ 1 ++// CHECK_ICL_M64: #define __RDRND__ 1 ++// CHECK_ICL_M64: #define __RDSEED__ 1 ++// CHECK_ICL_M64: #define __SGX__ 1 ++// CHECK_ICL_M64: #define __SHA__ 1 ++// CHECK_ICL_M64: #define __SSE2__ 1 ++// CHECK_ICL_M64: #define __SSE3__ 1 ++// CHECK_ICL_M64: #define __SSE4_1__ 1 ++// CHECK_ICL_M64: #define __SSE4_2__ 1 ++// CHECK_ICL_M64: #define __SSE__ 1 ++// CHECK_ICL_M64: #define __SSSE3__ 1 ++// CHECK_ICL_M64: #define __VAES__ 1 ++// CHECK_ICL_M64: #define __VPCLMULQDQ__ 1 ++// CHECK_ICL_M64-NOT: #define __WBNOINVD__ 1 ++// CHECK_ICL_M64: #define __XSAVEC__ 1 ++// CHECK_ICL_M64: #define __XSAVEOPT__ 1 ++// CHECK_ICL_M64: #define __XSAVES__ 1 ++// CHECK_ICL_M64: #define __XSAVE__ 1 ++// CHECK_ICL_M64: #define __amd64 1 ++// CHECK_ICL_M64: #define __amd64__ 1 ++// CHECK_ICL_M64: #define __corei7 1 ++// CHECK_ICL_M64: #define __corei7__ 1 ++// CHECK_ICL_M64: #define __tune_corei7__ 1 ++// CHECK_ICL_M64: #define __x86_64 1 ++// CHECK_ICL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=icelake-server -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICX_M32 ++// CHECK_ICX_M32: #define __AES__ 1 ++// CHECK_ICX_M32: #define __AVX2__ 1 ++// CHECK_ICX_M32: #define __AVX512BITALG__ 1 ++// CHECK_ICX_M32: #define __AVX512BW__ 1 ++// CHECK_ICX_M32: #define __AVX512CD__ 1 ++// CHECK_ICX_M32: #define __AVX512DQ__ 1 ++// CHECK_ICX_M32: #define __AVX512F__ 1 ++// CHECK_ICX_M32: #define __AVX512IFMA__ 1 ++// CHECK_ICX_M32: #define __AVX512VBMI2__ 1 ++// CHECK_ICX_M32: #define __AVX512VBMI__ 1 ++// CHECK_ICX_M32: #define __AVX512VL__ 1 ++// CHECK_ICX_M32: #define __AVX512VNNI__ 1 ++// CHECK_ICX_M32: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_ICX_M32: #define __AVX__ 1 ++// CHECK_ICX_M32: #define __BMI2__ 1 ++// CHECK_ICX_M32: #define __BMI__ 1 ++// CHECK_ICX_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_ICX_M32: #define __CLWB__ 1 ++// CHECK_ICX_M32: #define __F16C__ 1 ++// CHECK_ICX_M32: #define __FMA__ 1 ++// CHECK_ICX_M32: #define __GFNI__ 1 ++// CHECK_ICX_M32: #define __INVPCID__ 1 ++// CHECK_ICX_M32: #define __LZCNT__ 1 ++// CHECK_ICX_M32: #define __MMX__ 1 ++// CHECK_ICX_M32: #define __MOVBE__ 1 ++// CHECK_ICX_M32: #define __PCLMUL__ 1 ++// CHECK_ICX_M32: #define __PCONFIG__ 1 ++// CHECK_ICX_M32: #define __PKU__ 1 ++// CHECK_ICX_M32: #define __POPCNT__ 1 ++// CHECK_ICX_M32: #define __PRFCHW__ 1 ++// CHECK_ICX_M32: #define __RDPID__ 1 ++// CHECK_ICX_M32: #define __RDRND__ 1 ++// CHECK_ICX_M32: #define __RDSEED__ 1 ++// CHECK_ICX_M32: #define __SGX__ 1 ++// CHECK_ICX_M32: #define __SHA__ 1 ++// CHECK_ICX_M32: #define __SSE2__ 1 ++// CHECK_ICX_M32: #define __SSE3__ 1 ++// CHECK_ICX_M32: #define __SSE4_1__ 1 ++// CHECK_ICX_M32: #define __SSE4_2__ 1 ++// CHECK_ICX_M32: #define __SSE__ 1 ++// CHECK_ICX_M32: #define __SSSE3__ 1 ++// CHECK_ICX_M32: #define __VAES__ 1 ++// CHECK_ICX_M32: #define __VPCLMULQDQ__ 1 ++// CHECK_ICX_M32: #define __WBNOINVD__ 1 ++// CHECK_ICX_M32: #define __XSAVEC__ 1 ++// CHECK_ICX_M32: #define __XSAVEOPT__ 1 ++// CHECK_ICX_M32: #define __XSAVES__ 1 ++// CHECK_ICX_M32: #define __XSAVE__ 1 ++// CHECK_ICX_M32: #define __corei7 1 ++// CHECK_ICX_M32: #define __corei7__ 1 ++// CHECK_ICX_M32: #define __i386 1 ++// CHECK_ICX_M32: #define __i386__ 1 ++// CHECK_ICX_M32: #define __tune_corei7__ 1 ++// CHECK_ICX_M32: #define i386 1 ++ ++// RUN: %clang -march=icelake-server -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICX_M64 ++// CHECK_ICX_M64: #define __AES__ 1 ++// CHECK_ICX_M64: #define __AVX2__ 1 ++// CHECK_ICX_M64: #define __AVX512BITALG__ 1 ++// CHECK_ICX_M64: #define __AVX512BW__ 1 ++// CHECK_ICX_M64: #define __AVX512CD__ 1 ++// CHECK_ICX_M64: #define __AVX512DQ__ 1 ++// CHECK_ICX_M64: #define __AVX512F__ 1 ++// CHECK_ICX_M64: #define __AVX512IFMA__ 1 ++// CHECK_ICX_M64: #define __AVX512VBMI2__ 1 ++// CHECK_ICX_M64: #define __AVX512VBMI__ 1 ++// CHECK_ICX_M64: #define __AVX512VL__ 1 ++// CHECK_ICX_M64: #define __AVX512VNNI__ 1 ++// CHECK_ICX_M64: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_ICX_M64: #define __AVX__ 1 ++// CHECK_ICX_M64: #define __BMI2__ 1 ++// CHECK_ICX_M64: #define __BMI__ 1 ++// CHECK_ICX_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_ICX_M64: #define __CLWB__ 1 ++// CHECK_ICX_M64: #define __F16C__ 1 ++// CHECK_ICX_M64: #define __FMA__ 1 ++// CHECK_ICX_M64: #define __GFNI__ 1 ++// CHECK_ICX_M64: #define __INVPCID__ 1 ++// CHECK_ICX_M64: #define __LZCNT__ 1 ++// CHECK_ICX_M64: #define __MMX__ 1 ++// CHECK_ICX_M64: #define __MOVBE__ 1 ++// CHECK_ICX_M64: #define __PCLMUL__ 1 ++// CHECK_ICX_M64: #define __PCONFIG__ 1 ++// CHECK_ICX_M64: #define __PKU__ 1 ++// CHECK_ICX_M64: #define __POPCNT__ 1 ++// CHECK_ICX_M64: #define __PRFCHW__ 1 ++// CHECK_ICX_M64: #define __RDPID__ 1 ++// CHECK_ICX_M64: #define __RDRND__ 1 ++// CHECK_ICX_M64: #define __RDSEED__ 1 ++// CHECK_ICX_M64: #define __SGX__ 1 ++// CHECK_ICX_M64: #define __SHA__ 1 ++// CHECK_ICX_M64: #define __SSE2__ 1 ++// CHECK_ICX_M64: #define __SSE3__ 1 ++// CHECK_ICX_M64: #define __SSE4_1__ 1 ++// CHECK_ICX_M64: #define __SSE4_2__ 1 ++// CHECK_ICX_M64: #define __SSE__ 1 ++// CHECK_ICX_M64: #define __SSSE3__ 1 ++// CHECK_ICX_M64: #define __VAES__ 1 ++// CHECK_ICX_M64: #define __VPCLMULQDQ__ 1 ++// CHECK_ICX_M64: #define __WBNOINVD__ 1 ++// CHECK_ICX_M64: #define __XSAVEC__ 1 ++// CHECK_ICX_M64: #define __XSAVEOPT__ 1 ++// CHECK_ICX_M64: #define __XSAVES__ 1 ++// CHECK_ICX_M64: #define __XSAVE__ 1 ++// CHECK_ICX_M64: #define __amd64 1 ++// CHECK_ICX_M64: #define __amd64__ 1 ++// CHECK_ICX_M64: #define __corei7 1 ++// CHECK_ICX_M64: #define __corei7__ 1 ++// CHECK_ICX_M64: #define __tune_corei7__ 1 ++// CHECK_ICX_M64: #define __x86_64 1 ++// CHECK_ICX_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=tigerlake -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_TGL_M32 ++// CHECK_TGL_M32: #define __AES__ 1 ++// CHECK_TGL_M32: #define __AVX2__ 1 ++// CHECK_TGL_M32: #define __AVX512BITALG__ 1 ++// CHECK_TGL_M32: #define __AVX512BW__ 1 ++// CHECK_TGL_M32: #define __AVX512CD__ 1 ++// CHECK_TGL_M32: #define __AVX512DQ__ 1 ++// CHECK_TGL_M32: #define __AVX512F__ 1 ++// CHECK_TGL_M32: #define __AVX512IFMA__ 1 ++// CHECK_TGL_M32: #define __AVX512VBMI2__ 1 ++// CHECK_TGL_M32: #define __AVX512VBMI__ 1 ++// CHECK_TGL_M32: #define __AVX512VL__ 1 ++// CHECK_TGL_M32: #define __AVX512VNNI__ 1 ++// CHECK_TGL_M32: #define __AVX512VP2INTERSECT__ 1 ++// CHECK_TGL_M32: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_TGL_M32: #define __AVX__ 1 ++// CHECK_TGL_M32: #define __BMI2__ 1 ++// CHECK_TGL_M32: #define __BMI__ 1 ++// CHECK_TGL_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_TGL_M32: #define __CLWB__ 1 ++// CHECK_TGL_M32: #define __F16C__ 1 ++// CHECK_TGL_M32: #define __FMA__ 1 ++// CHECK_TGL_M32: #define __GFNI__ 1 ++// CHECK_TGL_M32: #define __INVPCID__ 1 ++// CHECK_TGL_M32: #define __LZCNT__ 1 ++// CHECK_TGL_M32: #define __MMX__ 1 ++// CHECK_TGL_M32: #define __MOVBE__ 1 ++// CHECK_TGL_M32: #define __MOVDIR64B__ 1 ++// CHECK_TGL_M32: #define __MOVDIRI__ 1 ++// CHECK_TGL_M32: #define __PCLMUL__ 1 ++// CHECK_TGL_M32-NOT: #define __PCONFIG__ 1 ++// CHECK_TGL_M32: #define __PKU__ 1 ++// CHECK_TGL_M32: #define __POPCNT__ 1 ++// CHECK_TGL_M32: #define __PRFCHW__ 1 ++// CHECK_TGL_M32: #define __RDPID__ 1 ++// CHECK_TGL_M32: #define __RDRND__ 1 ++// CHECK_TGL_M32: #define __RDSEED__ 1 ++// CHECK_TGL_M32: #define __SGX__ 1 ++// CHECK_TGL_M32: #define __SHA__ 1 ++// CHECK_TGL_M32: #define __SHSTK__ 1 ++// CHECK_TGL_M32: #define __SSE2__ 1 ++// CHECK_TGL_M32: #define __SSE3__ 1 ++// CHECK_TGL_M32: #define __SSE4_1__ 1 ++// CHECK_TGL_M32: #define __SSE4_2__ 1 ++// CHECK_TGL_M32: #define __SSE__ 1 ++// CHECK_TGL_M32: #define __SSSE3__ 1 ++// CHECK_TGL_M32: #define __VAES__ 1 ++// CHECK_TGL_M32: #define __VPCLMULQDQ__ 1 ++// CHECK_TGL_M32-NOT: #define __WBNOINVD__ 1 ++// CHECK_TGL_M32: #define __XSAVEC__ 1 ++// CHECK_TGL_M32: #define __XSAVEOPT__ 1 ++// CHECK_TGL_M32: #define __XSAVES__ 1 ++// CHECK_TGL_M32: #define __XSAVE__ 1 ++// CHECK_TGL_M32: #define __corei7 1 ++// CHECK_TGL_M32: #define __corei7__ 1 ++// CHECK_TGL_M32: #define __i386 1 ++// CHECK_TGL_M32: #define __i386__ 1 ++// CHECK_TGL_M32: #define __tune_corei7__ 1 ++// CHECK_TGL_M32: #define i386 1 ++ ++// RUN: %clang -march=tigerlake -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_TGL_M64 ++// CHECK_TGL_M64: #define __AES__ 1 ++// CHECK_TGL_M64: #define __AVX2__ 1 ++// CHECK_TGL_M64: #define __AVX512BITALG__ 1 ++// CHECK_TGL_M64: #define __AVX512BW__ 1 ++// CHECK_TGL_M64: #define __AVX512CD__ 1 ++// CHECK_TGL_M64: #define __AVX512DQ__ 1 ++// CHECK_TGL_M64: #define __AVX512F__ 1 ++// CHECK_TGL_M64: #define __AVX512IFMA__ 1 ++// CHECK_TGL_M64: #define __AVX512VBMI2__ 1 ++// CHECK_TGL_M64: #define __AVX512VBMI__ 1 ++// CHECK_TGL_M64: #define __AVX512VL__ 1 ++// CHECK_TGL_M64: #define __AVX512VNNI__ 1 ++// CHECK_TGL_M64: #define __AVX512VP2INTERSECT__ 1 ++// CHECK_TGL_M64: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_TGL_M64: #define __AVX__ 1 ++// CHECK_TGL_M64: #define __BMI2__ 1 ++// CHECK_TGL_M64: #define __BMI__ 1 ++// CHECK_TGL_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_TGL_M64: #define __CLWB__ 1 ++// CHECK_TGL_M64: #define __F16C__ 1 ++// CHECK_TGL_M64: #define __FMA__ 1 ++// CHECK_TGL_M64: #define __GFNI__ 1 ++// CHECK_TGL_M64: #define __INVPCID__ 1 ++// CHECK_TGL_M64: #define __LZCNT__ 1 ++// CHECK_TGL_M64: #define __MMX__ 1 ++// CHECK_TGL_M64: #define __MOVBE__ 1 ++// CHECK_TGL_M64: #define __MOVDIR64B__ 1 ++// CHECK_TGL_M64: #define __MOVDIRI__ 1 ++// CHECK_TGL_M64: #define __PCLMUL__ 1 ++// CHECK_TGL_M64-NOT: #define __PCONFIG__ 1 ++// CHECK_TGL_M64: #define __PKU__ 1 ++// CHECK_TGL_M64: #define __POPCNT__ 1 ++// CHECK_TGL_M64: #define __PRFCHW__ 1 ++// CHECK_TGL_M64: #define __RDPID__ 1 ++// CHECK_TGL_M64: #define __RDRND__ 1 ++// CHECK_TGL_M64: #define __RDSEED__ 1 ++// CHECK_TGL_M64: #define __SGX__ 1 ++// CHECK_TGL_M64: #define __SHA__ 1 ++// CHECK_TGL_M64: #define __SHSTK__ 1 ++// CHECK_TGL_M64: #define __SSE2__ 1 ++// CHECK_TGL_M64: #define __SSE3__ 1 ++// CHECK_TGL_M64: #define __SSE4_1__ 1 ++// CHECK_TGL_M64: #define __SSE4_2__ 1 ++// CHECK_TGL_M64: #define __SSE__ 1 ++// CHECK_TGL_M64: #define __SSSE3__ 1 ++// CHECK_TGL_M64: #define __VAES__ 1 ++// CHECK_TGL_M64: #define __VPCLMULQDQ__ 1 ++// CHECK_TGL_M64-NOT: #define __WBNOINVD__ 1 ++// CHECK_TGL_M64: #define __XSAVEC__ 1 ++// CHECK_TGL_M64: #define __XSAVEOPT__ 1 ++// CHECK_TGL_M64: #define __XSAVES__ 1 ++// CHECK_TGL_M64: #define __XSAVE__ 1 ++// CHECK_TGL_M64: #define __amd64 1 ++// CHECK_TGL_M64: #define __amd64__ 1 ++// CHECK_TGL_M64: #define __corei7 1 ++// CHECK_TGL_M64: #define __corei7__ 1 ++// CHECK_TGL_M64: #define __tune_corei7__ 1 ++// CHECK_TGL_M64: #define __x86_64 1 ++// CHECK_TGL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=sapphirerapids -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPR_M32 ++// CHECK_SPR_M32: #define __AES__ 1 ++// CHECK_SPR_M32: #define __AMXBF16__ 1 ++// CHECK_SPR_M32: #define __AMXINT8__ 1 ++// CHECK_SPR_M32: #define __AMXTILE__ 1 ++// CHECK_SPR_M32: #define __AVX2__ 1 ++// CHECK_SPR_M32: #define __AVX512BF16__ 1 ++// CHECK_SPR_M32: #define __AVX512BITALG__ 1 ++// CHECK_SPR_M32: #define __AVX512BW__ 1 ++// CHECK_SPR_M32: #define __AVX512CD__ 1 ++// CHECK_SPR_M32: #define __AVX512DQ__ 1 ++// CHECK_SPR_M32: #define __AVX512F__ 1 ++// CHECK_SPR_M32: #define __AVX512IFMA__ 1 ++// CHECK_SPR_M32: #define __AVX512VBMI2__ 1 ++// CHECK_SPR_M32: #define __AVX512VBMI__ 1 ++// CHECK_SPR_M32: #define __AVX512VL__ 1 ++// CHECK_SPR_M32: #define __AVX512VNNI__ 1 ++// CHECK_SPR_M32: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_SPR_M32: #define __AVXVNNI__ 1 ++// CHECK_SPR_M32: #define __AVX__ 1 ++// CHECK_SPR_M32: #define __BMI2__ 1 ++// CHECK_SPR_M32: #define __BMI__ 1 ++// CHECK_SPR_M32: #define __CLDEMOTE__ 1 ++// CHECK_SPR_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_SPR_M32: #define __CLWB__ 1 ++// CHECK_SPR_M32: #define __ENQCMD__ 1 ++// CHECK_SPR_M32: #define __F16C__ 1 ++// CHECK_SPR_M32: #define __FMA__ 1 ++// CHECK_SPR_M32: #define __GFNI__ 1 ++// CHECK_SPR_M32: #define __INVPCID__ 1 ++// CHECK_SPR_M32: #define __LZCNT__ 1 ++// CHECK_SPR_M32: #define __MMX__ 1 ++// CHECK_SPR_M32: #define __MOVBE__ 1 ++// CHECK_SPR_M32: #define __PCLMUL__ 1 ++// CHECK_SPR_M32: #define __PCONFIG__ 1 ++// CHECK_SPR_M32: #define __PKU__ 1 ++// CHECK_SPR_M32: #define __POPCNT__ 1 ++// CHECK_SPR_M32: #define __PRFCHW__ 1 ++// CHECK_SPR_M32: #define __PTWRITE__ 1 ++// CHECK_SPR_M32: #define __RDPID__ 1 ++// CHECK_SPR_M32: #define __RDRND__ 1 ++// CHECK_SPR_M32: #define __RDSEED__ 1 ++// CHECK_SPR_M32: #define __SERIALIZE__ 1 ++// CHECK_SPR_M32: #define __SGX__ 1 ++// CHECK_SPR_M32: #define __SHA__ 1 ++// CHECK_SPR_M32: #define __SHSTK__ 1 ++// CHECK_SPR_M32: #define __SSE2__ 1 ++// CHECK_SPR_M32: #define __SSE3__ 1 ++// CHECK_SPR_M32: #define __SSE4_1__ 1 ++// CHECK_SPR_M32: #define __SSE4_2__ 1 ++// CHECK_SPR_M32: #define __SSE__ 1 ++// CHECK_SPR_M32: #define __SSSE3__ 1 ++// CHECK_SPR_M32: #define __TSXLDTRK__ 1 ++// CHECK_SPR_M32: #define __UINTR__ 1 ++// CHECK_SPR_M32: #define __VAES__ 1 ++// CHECK_SPR_M32: #define __VPCLMULQDQ__ 1 ++// CHECK_SPR_M32: #define __WAITPKG__ 1 ++// CHECK_SPR_M32: #define __WBNOINVD__ 1 ++// CHECK_SPR_M32: #define __XSAVEC__ 1 ++// CHECK_SPR_M32: #define __XSAVEOPT__ 1 ++// CHECK_SPR_M32: #define __XSAVES__ 1 ++// CHECK_SPR_M32: #define __XSAVE__ 1 ++// CHECK_SPR_M32: #define __corei7 1 ++// CHECK_SPR_M32: #define __corei7__ 1 ++// CHECK_SPR_M32: #define __i386 1 ++// CHECK_SPR_M32: #define __i386__ 1 ++// CHECK_SPR_M32: #define __tune_corei7__ 1 ++// CHECK_SPR_M32: #define i386 1 ++ ++// RUN: %clang -march=sapphirerapids -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPR_M64 ++// CHECK_SPR_M64: #define __AES__ 1 ++// CHECK_SPR_M64: #define __AMXBF16__ 1 ++// CHECK_SPR_M64: #define __AMXINT8__ 1 ++// CHECK_SPR_M64: #define __AMXTILE__ 1 ++// CHECK_SPR_M64: #define __AVX2__ 1 ++// CHECK_SPR_M64: #define __AVX512BF16__ 1 ++// CHECK_SPR_M64: #define __AVX512BITALG__ 1 ++// CHECK_SPR_M64: #define __AVX512BW__ 1 ++// CHECK_SPR_M64: #define __AVX512CD__ 1 ++// CHECK_SPR_M64: #define __AVX512DQ__ 1 ++// CHECK_SPR_M64: #define __AVX512F__ 1 ++// CHECK_SPR_M64: #define __AVX512IFMA__ 1 ++// CHECK_SPR_M64: #define __AVX512VBMI2__ 1 ++// CHECK_SPR_M64: #define __AVX512VBMI__ 1 ++// CHECK_SPR_M64: #define __AVX512VL__ 1 ++// CHECK_SPR_M64: #define __AVX512VNNI__ 1 ++// CHECK_SPR_M64: #define __AVX512VPOPCNTDQ__ 1 ++// CHECK_SPR_M64: #define __AVXVNNI__ 1 ++// CHECK_SPR_M64: #define __AVX__ 1 ++// CHECK_SPR_M64: #define __BMI2__ 1 ++// CHECK_SPR_M64: #define __BMI__ 1 ++// CHECK_SPR_M64: #define __CLDEMOTE__ 1 ++// CHECK_SPR_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_SPR_M64: #define __CLWB__ 1 ++// CHECK_SPR_M64: #define __ENQCMD__ 1 ++// CHECK_SPR_M64: #define __F16C__ 1 ++// CHECK_SPR_M64: #define __FMA__ 1 ++// CHECK_SPR_M64: #define __GFNI__ 1 ++// CHECK_SPR_M64: #define __INVPCID__ 1 ++// CHECK_SPR_M64: #define __LZCNT__ 1 ++// CHECK_SPR_M64: #define __MMX__ 1 ++// CHECK_SPR_M64: #define __MOVBE__ 1 ++// CHECK_SPR_M64: #define __PCLMUL__ 1 ++// CHECK_SPR_M64: #define __PCONFIG__ 1 ++// CHECK_SPR_M64: #define __PKU__ 1 ++// CHECK_SPR_M64: #define __POPCNT__ 1 ++// CHECK_SPR_M64: #define __PRFCHW__ 1 ++// CHECK_SPR_M64: #define __PTWRITE__ 1 ++// CHECK_SPR_M64: #define __RDPID__ 1 ++// CHECK_SPR_M64: #define __RDRND__ 1 ++// CHECK_SPR_M64: #define __RDSEED__ 1 ++// CHECK_SPR_M64: #define __SERIALIZE__ 1 ++// CHECK_SPR_M64: #define __SGX__ 1 ++// CHECK_SPR_M64: #define __SHA__ 1 ++// CHECK_SPR_M64: #define __SHSTK__ 1 ++// CHECK_SPR_M64: #define __SSE2__ 1 ++// CHECK_SPR_M64: #define __SSE3__ 1 ++// CHECK_SPR_M64: #define __SSE4_1__ 1 ++// CHECK_SPR_M64: #define __SSE4_2__ 1 ++// CHECK_SPR_M64: #define __SSE__ 1 ++// CHECK_SPR_M64: #define __SSSE3__ 1 ++// CHECK_SPR_M64: #define __TSXLDTRK__ 1 ++// CHECK_SPR_M64: #define __UINTR__ 1 ++// CHECK_SPR_M64: #define __VAES__ 1 ++// CHECK_SPR_M64: #define __VPCLMULQDQ__ 1 ++// CHECK_SPR_M64: #define __WAITPKG__ 1 ++// CHECK_SPR_M64: #define __WBNOINVD__ 1 ++// CHECK_SPR_M64: #define __XSAVEC__ 1 ++// CHECK_SPR_M64: #define __XSAVEOPT__ 1 ++// CHECK_SPR_M64: #define __XSAVES__ 1 ++// CHECK_SPR_M64: #define __XSAVE__ 1 ++// CHECK_SPR_M64: #define __amd64 1 ++// CHECK_SPR_M64: #define __amd64__ 1 ++// CHECK_SPR_M64: #define __corei7 1 ++// CHECK_SPR_M64: #define __corei7__ 1 ++// CHECK_SPR_M64: #define __tune_corei7__ 1 ++// CHECK_SPR_M64: #define __x86_64 1 ++// CHECK_SPR_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=alderlake -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 ++// CHECK_ADL_M32: #define __ADX__ 1 ++// CHECK_ADL_M32: #define __AES__ 1 ++// CHECK_ADL_M32: #define __AVX2__ 1 ++// CHECK_ADL_M32-NOT: AVX512 ++// CHECK_ADL_M32: #define __AVXVNNI__ 1 ++// CHECK_ADL_M32: #define __AVX__ 1 ++// CHECK_ADL_M32: #define __BMI2__ 1 ++// CHECK_ADL_M32: #define __BMI__ 1 ++// CHECK_ADL_M32: #define __CLDEMOTE__ 1 ++// CHECK_ADL_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_ADL_M32: #define __F16C__ 1 ++// CHECK_ADL_M32: #define __FMA__ 1 ++// CHECK_ADL_M32: #define __HRESET__ 1 ++// CHECK_ADL_M32: #define __INVPCID__ 1 ++// CHECK_ADL_M32: #define __LZCNT__ 1 ++// CHECK_ADL_M32: #define __MMX__ 1 ++// CHECK_ADL_M32: #define __MOVBE__ 1 ++// CHECK_ADL_M32: #define __PCLMUL__ 1 ++// CHECK_ADL_M32: #define __POPCNT__ 1 ++// CHECK_ADL_M32: #define __PRFCHW__ 1 ++// CHECK_ADL_M32: #define __PTWRITE__ 1 ++// CHECK_ADL_M32: #define __RDRND__ 1 ++// CHECK_ADL_M32: #define __RDSEED__ 1 ++// CHECK_ADL_M32: #define __SERIALIZE__ 1 ++// CHECK_ADL_M32: #define __SGX__ 1 ++// CHECK_ADL_M32: #define __SSE2__ 1 ++// CHECK_ADL_M32: #define __SSE3__ 1 ++// CHECK_ADL_M32: #define __SSE4_1__ 1 ++// CHECK_ADL_M32: #define __SSE4_2__ 1 ++// CHECK_ADL_M32: #define __SSE__ 1 ++// CHECK_ADL_M32: #define __SSSE3__ 1 ++// CHECK_ADL_M32: #define __WAITPKG__ 1 ++// CHECK_ADL_M32: #define __XSAVEC__ 1 ++// CHECK_ADL_M32: #define __XSAVEOPT__ 1 ++// CHECK_ADL_M32: #define __XSAVES__ 1 ++// CHECK_ADL_M32: #define __XSAVE__ 1 ++// CHECK_ADL_M32: #define i386 1 ++ ++// RUN: %clang -march=alderlake -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 ++// CHECK_ADL_M64: #define __ADX__ 1 ++// CHECK_ADL_M64: #define __AES__ 1 ++// CHECK_ADL_M64: #define __AVX2__ 1 ++// CHECK_ADL_M64-NOT: AVX512 ++// CHECK_ADL_M64: #define __AVXVNNI__ 1 ++// CHECK_ADL_M64: #define __AVX__ 1 ++// CHECK_ADL_M64: #define __BMI2__ 1 ++// CHECK_ADL_M64: #define __BMI__ 1 ++// CHECK_ADL_M64: #define __CLDEMOTE__ 1 ++// CHECK_ADL_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_ADL_M64: #define __F16C__ 1 ++// CHECK_ADL_M64: #define __FMA__ 1 ++// CHECK_ADL_M64: #define __HRESET__ 1 ++// CHECK_ADL_M64: #define __INVPCID__ 1 ++// CHECK_ADL_M64: #define __LZCNT__ 1 ++// CHECK_ADL_M64: #define __MMX__ 1 ++// CHECK_ADL_M64: #define __MOVBE__ 1 ++// CHECK_ADL_M64: #define __PCLMUL__ 1 ++// CHECK_ADL_M64: #define __POPCNT__ 1 ++// CHECK_ADL_M64: #define __PRFCHW__ 1 ++// CHECK_ADL_M64: #define __PTWRITE__ 1 ++// CHECK_ADL_M64: #define __RDRND__ 1 ++// CHECK_ADL_M64: #define __RDSEED__ 1 ++// CHECK_ADL_M64: #define __SERIALIZE__ 1 ++// CHECK_ADL_M64: #define __SGX__ 1 ++// CHECK_ADL_M64: #define __SSE2_MATH__ 1 ++// CHECK_ADL_M64: #define __SSE2__ 1 ++// CHECK_ADL_M64: #define __SSE3__ 1 ++// CHECK_ADL_M64: #define __SSE4_1__ 1 ++// CHECK_ADL_M64: #define __SSE4_2__ 1 ++// CHECK_ADL_M64: #define __SSE_MATH__ 1 ++// CHECK_ADL_M64: #define __SSE__ 1 ++// CHECK_ADL_M64: #define __SSSE3__ 1 ++// CHECK_ADL_M64: #define __WAITPKG__ 1 ++// CHECK_ADL_M64: #define __XSAVEC__ 1 ++// CHECK_ADL_M64: #define __XSAVEOPT__ 1 ++// CHECK_ADL_M64: #define __XSAVES__ 1 ++// CHECK_ADL_M64: #define __XSAVE__ 1 ++// CHECK_ADL_M64: #define __amd64 1 ++// CHECK_ADL_M64: #define __amd64__ 1 ++// CHECK_ADL_M64: #define __x86_64 1 ++// CHECK_ADL_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=atom -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M32 ++// CHECK_ATOM_M32: #define __MMX__ 1 ++// CHECK_ATOM_M32: #define __MOVBE__ 1 ++// CHECK_ATOM_M32: #define __SSE2__ 1 ++// CHECK_ATOM_M32: #define __SSE3__ 1 ++// CHECK_ATOM_M32: #define __SSE__ 1 ++// CHECK_ATOM_M32: #define __SSSE3__ 1 ++// CHECK_ATOM_M32: #define __atom 1 ++// CHECK_ATOM_M32: #define __atom__ 1 ++// CHECK_ATOM_M32: #define __i386 1 ++// CHECK_ATOM_M32: #define __i386__ 1 ++// CHECK_ATOM_M32: #define __tune_atom__ 1 ++// CHECK_ATOM_M32: #define i386 1 ++ ++// RUN: %clang -march=atom -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATOM_M64 ++// CHECK_ATOM_M64: #define __MMX__ 1 ++// CHECK_ATOM_M64: #define __MOVBE__ 1 ++// CHECK_ATOM_M64: #define __SSE2_MATH__ 1 ++// CHECK_ATOM_M64: #define __SSE2__ 1 ++// CHECK_ATOM_M64: #define __SSE3__ 1 ++// CHECK_ATOM_M64: #define __SSE_MATH__ 1 ++// CHECK_ATOM_M64: #define __SSE__ 1 ++// CHECK_ATOM_M64: #define __SSSE3__ 1 ++// CHECK_ATOM_M64: #define __amd64 1 ++// CHECK_ATOM_M64: #define __amd64__ 1 ++// CHECK_ATOM_M64: #define __atom 1 ++// CHECK_ATOM_M64: #define __atom__ 1 ++// CHECK_ATOM_M64: #define __tune_atom__ 1 ++// CHECK_ATOM_M64: #define __x86_64 1 ++// CHECK_ATOM_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=goldmont -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_GLM_M32 ++// CHECK_GLM_M32: #define __AES__ 1 ++// CHECK_GLM_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_GLM_M32: #define __FSGSBASE__ 1 ++// CHECK_GLM_M32: #define __FXSR__ 1 ++// CHECK_GLM_M32: #define __MMX__ 1 ++// CHECK_GLM_M32: #define __MOVBE__ 1 ++// CHECK_GLM_M32: #define __PCLMUL__ 1 ++// CHECK_GLM_M32: #define __POPCNT__ 1 ++// CHECK_GLM_M32: #define __PRFCHW__ 1 ++// CHECK_GLM_M32: #define __RDRND__ 1 ++// CHECK_GLM_M32: #define __RDSEED__ 1 ++// CHECK_GLM_M32: #define __SHA__ 1 ++// CHECK_GLM_M32: #define __SSE2__ 1 ++// CHECK_GLM_M32: #define __SSE3__ 1 ++// CHECK_GLM_M32: #define __SSE4_1__ 1 ++// CHECK_GLM_M32: #define __SSE4_2__ 1 ++// CHECK_GLM_M32: #define __SSE_MATH__ 1 ++// CHECK_GLM_M32: #define __SSE__ 1 ++// CHECK_GLM_M32: #define __SSSE3__ 1 ++// CHECK_GLM_M32: #define __XSAVEC__ 1 ++// CHECK_GLM_M32: #define __XSAVEOPT__ 1 ++// CHECK_GLM_M32: #define __XSAVES__ 1 ++// CHECK_GLM_M32: #define __XSAVE__ 1 ++// CHECK_GLM_M32: #define __goldmont 1 ++// CHECK_GLM_M32: #define __goldmont__ 1 ++// CHECK_GLM_M32: #define __i386 1 ++// CHECK_GLM_M32: #define __i386__ 1 ++// CHECK_GLM_M32: #define __tune_goldmont__ 1 ++// CHECK_GLM_M32: #define i386 1 ++ ++// RUN: %clang -march=goldmont -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_GLM_M64 ++// CHECK_GLM_M64: #define __AES__ 1 ++// CHECK_GLM_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_GLM_M64: #define __FSGSBASE__ 1 ++// CHECK_GLM_M64: #define __FXSR__ 1 ++// CHECK_GLM_M64: #define __MMX__ 1 ++// CHECK_GLM_M64: #define __MOVBE__ 1 ++// CHECK_GLM_M64: #define __PCLMUL__ 1 ++// CHECK_GLM_M64: #define __POPCNT__ 1 ++// CHECK_GLM_M64: #define __PRFCHW__ 1 ++// CHECK_GLM_M64: #define __RDRND__ 1 ++// CHECK_GLM_M64: #define __RDSEED__ 1 ++// CHECK_GLM_M64: #define __SHA__ 1 ++// CHECK_GLM_M64: #define __SSE2__ 1 ++// CHECK_GLM_M64: #define __SSE3__ 1 ++// CHECK_GLM_M64: #define __SSE4_1__ 1 ++// CHECK_GLM_M64: #define __SSE4_2__ 1 ++// CHECK_GLM_M64: #define __SSE__ 1 ++// CHECK_GLM_M64: #define __SSSE3__ 1 ++// CHECK_GLM_M64: #define __XSAVEC__ 1 ++// CHECK_GLM_M64: #define __XSAVEOPT__ 1 ++// CHECK_GLM_M64: #define __XSAVES__ 1 ++// CHECK_GLM_M64: #define __XSAVE__ 1 ++// CHECK_GLM_M64: #define __goldmont 1 ++// CHECK_GLM_M64: #define __goldmont__ 1 ++// CHECK_GLM_M64: #define __tune_goldmont__ 1 ++// CHECK_GLM_M64: #define __x86_64 1 ++// CHECK_GLM_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=goldmont-plus -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_GLMP_M32 ++// CHECK_GLMP_M32: #define __AES__ 1 ++// CHECK_GLMP_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_GLMP_M32: #define __FSGSBASE__ 1 ++// CHECK_GLMP_M32: #define __FXSR__ 1 ++// CHECK_GLMP_M32: #define __MMX__ 1 ++// CHECK_GLMP_M32: #define __MOVBE__ 1 ++// CHECK_GLMP_M32: #define __PCLMUL__ 1 ++// CHECK_GLMP_M32: #define __POPCNT__ 1 ++// CHECK_GLMP_M32: #define __PRFCHW__ 1 ++// CHECK_GLMP_M32: #define __PTWRITE__ 1 ++// CHECK_GLMP_M32: #define __RDPID__ 1 ++// CHECK_GLMP_M32: #define __RDRND__ 1 ++// CHECK_GLMP_M32: #define __RDSEED__ 1 ++// CHECK_GLMP_M32: #define __SGX__ 1 ++// CHECK_GLMP_M32: #define __SHA__ 1 ++// CHECK_GLMP_M32: #define __SSE2__ 1 ++// CHECK_GLMP_M32: #define __SSE3__ 1 ++// CHECK_GLMP_M32: #define __SSE4_1__ 1 ++// CHECK_GLMP_M32: #define __SSE4_2__ 1 ++// CHECK_GLMP_M32: #define __SSE_MATH__ 1 ++// CHECK_GLMP_M32: #define __SSE__ 1 ++// CHECK_GLMP_M32: #define __SSSE3__ 1 ++// CHECK_GLMP_M32: #define __XSAVEC__ 1 ++// CHECK_GLMP_M32: #define __XSAVEOPT__ 1 ++// CHECK_GLMP_M32: #define __XSAVES__ 1 ++// CHECK_GLMP_M32: #define __XSAVE__ 1 ++// CHECK_GLMP_M32: #define __goldmont_plus 1 ++// CHECK_GLMP_M32: #define __goldmont_plus__ 1 ++// CHECK_GLMP_M32: #define __i386 1 ++// CHECK_GLMP_M32: #define __i386__ 1 ++// CHECK_GLMP_M32: #define __tune_goldmont_plus__ 1 ++// CHECK_GLMP_M32: #define i386 1 ++ ++// RUN: %clang -march=goldmont-plus -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_GLMP_M64 ++// CHECK_GLMP_M64: #define __AES__ 1 ++// CHECK_GLMP_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_GLMP_M64: #define __FSGSBASE__ 1 ++// CHECK_GLMP_M64: #define __FXSR__ 1 ++// CHECK_GLMP_M64: #define __MMX__ 1 ++// CHECK_GLMP_M64: #define __MOVBE__ 1 ++// CHECK_GLMP_M64: #define __PCLMUL__ 1 ++// CHECK_GLMP_M64: #define __POPCNT__ 1 ++// CHECK_GLMP_M64: #define __PRFCHW__ 1 ++// CHECK_GLMP_M64: #define __PTWRITE__ 1 ++// CHECK_GLMP_M64: #define __RDPID__ 1 ++// CHECK_GLMP_M64: #define __RDRND__ 1 ++// CHECK_GLMP_M64: #define __RDSEED__ 1 ++// CHECK_GLMP_M64: #define __SGX__ 1 ++// CHECK_GLMP_M64: #define __SHA__ 1 ++// CHECK_GLMP_M64: #define __SSE2__ 1 ++// CHECK_GLMP_M64: #define __SSE3__ 1 ++// CHECK_GLMP_M64: #define __SSE4_1__ 1 ++// CHECK_GLMP_M64: #define __SSE4_2__ 1 ++// CHECK_GLMP_M64: #define __SSE__ 1 ++// CHECK_GLMP_M64: #define __SSSE3__ 1 ++// CHECK_GLMP_M64: #define __XSAVEC__ 1 ++// CHECK_GLMP_M64: #define __XSAVEOPT__ 1 ++// CHECK_GLMP_M64: #define __XSAVES__ 1 ++// CHECK_GLMP_M64: #define __XSAVE__ 1 ++// CHECK_GLMP_M64: #define __goldmont_plus 1 ++// CHECK_GLMP_M64: #define __goldmont_plus__ 1 ++// CHECK_GLMP_M64: #define __tune_goldmont_plus__ 1 ++// CHECK_GLMP_M64: #define __x86_64 1 ++// CHECK_GLMP_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=tremont -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_TRM_M32 ++// CHECK_TRM_M32: #define __AES__ 1 ++// CHECK_TRM_M32-NOT: #define __CLDEMOTE__ 1 ++// CHECK_TRM_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_TRM_M32: #define __CLWB__ 1 ++// CHECK_TRM_M32: #define __FSGSBASE__ 1 ++// CHECK_TRM_M32: #define __FXSR__ 1 ++// CHECK_TRM_M32: #define __GFNI__ 1 ++// CHECK_TRM_M32: #define __MMX__ 1 ++// CHECK_TRM_M32: #define __MOVBE__ 1 ++// CHECK_TRM_M32-NOT: #define __MOVDIR64B__ 1 ++// CHECK_TRM_M32-NOT: #define __MOVDIRI__ 1 ++// CHECK_TRM_M32: #define __PCLMUL__ 1 ++// CHECK_TRM_M32: #define __POPCNT__ 1 ++// CHECK_TRM_M32: #define __PRFCHW__ 1 ++// CHECK_TRM_M32: #define __PTWRITE__ 1 ++// CHECK_TRM_M32: #define __RDPID__ 1 ++// CHECK_TRM_M32: #define __RDRND__ 1 ++// CHECK_TRM_M32: #define __RDSEED__ 1 ++// CHECK_TRM_M32: #define __SGX__ 1 ++// CHECK_TRM_M32: #define __SHA__ 1 ++// CHECK_TRM_M32: #define __SSE2__ 1 ++// CHECK_TRM_M32: #define __SSE3__ 1 ++// CHECK_TRM_M32: #define __SSE4_1__ 1 ++// CHECK_TRM_M32: #define __SSE4_2__ 1 ++// CHECK_TRM_M32: #define __SSE_MATH__ 1 ++// CHECK_TRM_M32: #define __SSE__ 1 ++// CHECK_TRM_M32: #define __SSSE3__ 1 ++// CHECK_TRM_M32-NOT: #define __WAITPKG__ 1 ++// CHECK_TRM_M32: #define __XSAVEC__ 1 ++// CHECK_TRM_M32: #define __XSAVEOPT__ 1 ++// CHECK_TRM_M32: #define __XSAVES__ 1 ++// CHECK_TRM_M32: #define __XSAVE__ 1 ++// CHECK_TRM_M32: #define __i386 1 ++// CHECK_TRM_M32: #define __i386__ 1 ++// CHECK_TRM_M32: #define __tremont 1 ++// CHECK_TRM_M32: #define __tremont__ 1 ++// CHECK_TRM_M32: #define __tune_tremont__ 1 ++// CHECK_TRM_M32: #define i386 1 ++ ++// RUN: %clang -march=tremont -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_TRM_M64 ++// CHECK_TRM_M64: #define __AES__ 1 ++// CHECK_TRM_M64-NOT: #define __CLDEMOTE__ 1 ++// CHECK_TRM_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_TRM_M64: #define __CLWB__ 1 ++// CHECK_TRM_M64: #define __FSGSBASE__ 1 ++// CHECK_TRM_M64: #define __FXSR__ 1 ++// CHECK_TRM_M64: #define __GFNI__ 1 ++// CHECK_TRM_M64: #define __MMX__ 1 ++// CHECK_TRM_M64: #define __MOVBE__ 1 ++// CHECK_TRM_M64-NOT: #define __MOVDIR64B__ 1 ++// CHECK_TRM_M64-NOT: #define __MOVDIRI__ 1 ++// CHECK_TRM_M64: #define __PCLMUL__ 1 ++// CHECK_TRM_M64: #define __POPCNT__ 1 ++// CHECK_TRM_M64: #define __PRFCHW__ 1 ++// CHECK_TRM_M64: #define __PTWRITE__ 1 ++// CHECK_TRM_M64: #define __RDPID__ 1 ++// CHECK_TRM_M64: #define __RDRND__ 1 ++// CHECK_TRM_M64: #define __RDSEED__ 1 ++// CHECK_TRM_M64: #define __SGX__ 1 ++// CHECK_TRM_M64: #define __SHA__ 1 ++// CHECK_TRM_M64: #define __SSE2__ 1 ++// CHECK_TRM_M64: #define __SSE3__ 1 ++// CHECK_TRM_M64: #define __SSE4_1__ 1 ++// CHECK_TRM_M64: #define __SSE4_2__ 1 ++// CHECK_TRM_M64: #define __SSE__ 1 ++// CHECK_TRM_M64: #define __SSSE3__ 1 ++// CHECK_TRM_M64-NOT: #define __WAITPKG__ 1 ++// CHECK_TRM_M64: #define __XSAVEC__ 1 ++// CHECK_TRM_M64: #define __XSAVEOPT__ 1 ++// CHECK_TRM_M64: #define __XSAVES__ 1 ++// CHECK_TRM_M64: #define __XSAVE__ 1 ++// CHECK_TRM_M64: #define __tremont 1 ++// CHECK_TRM_M64: #define __tremont__ 1 ++// CHECK_TRM_M64: #define __tune_tremont__ 1 ++// CHECK_TRM_M64: #define __x86_64 1 ++// CHECK_TRM_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=slm -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M32 ++// CHECK_SLM_M32: #define __FXSR__ 1 ++// CHECK_SLM_M32: #define __MMX__ 1 ++// CHECK_SLM_M32: #define __MOVBE__ 1 ++// CHECK_SLM_M32: #define __PCLMUL__ 1 ++// CHECK_SLM_M32: #define __POPCNT__ 1 ++// CHECK_SLM_M32: #define __PRFCHW__ 1 ++// CHECK_SLM_M32: #define __RDRND__ 1 ++// CHECK_SLM_M32: #define __SSE2__ 1 ++// CHECK_SLM_M32: #define __SSE3__ 1 ++// CHECK_SLM_M32: #define __SSE4_1__ 1 ++// CHECK_SLM_M32: #define __SSE4_2__ 1 ++// CHECK_SLM_M32: #define __SSE__ 1 ++// CHECK_SLM_M32: #define __SSSE3__ 1 ++// CHECK_SLM_M32: #define __i386 1 ++// CHECK_SLM_M32: #define __i386__ 1 ++// CHECK_SLM_M32: #define __slm 1 ++// CHECK_SLM_M32: #define __slm__ 1 ++// CHECK_SLM_M32: #define __tune_slm__ 1 ++// CHECK_SLM_M32: #define i386 1 ++ ++// RUN: %clang -march=slm -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M64 ++// CHECK_SLM_M64: #define __FXSR__ 1 ++// CHECK_SLM_M64: #define __MMX__ 1 ++// CHECK_SLM_M64: #define __MOVBE__ 1 ++// CHECK_SLM_M64: #define __PCLMUL__ 1 ++// CHECK_SLM_M64: #define __POPCNT__ 1 ++// CHECK_SLM_M64: #define __PRFCHW__ 1 ++// CHECK_SLM_M64: #define __RDRND__ 1 ++// CHECK_SLM_M64: #define __SSE2_MATH__ 1 ++// CHECK_SLM_M64: #define __SSE2__ 1 ++// CHECK_SLM_M64: #define __SSE3__ 1 ++// CHECK_SLM_M64: #define __SSE4_1__ 1 ++// CHECK_SLM_M64: #define __SSE4_2__ 1 ++// CHECK_SLM_M64: #define __SSE_MATH__ 1 ++// CHECK_SLM_M64: #define __SSE__ 1 ++// CHECK_SLM_M64: #define __SSSE3__ 1 ++// CHECK_SLM_M64: #define __amd64 1 ++// CHECK_SLM_M64: #define __amd64__ 1 ++// CHECK_SLM_M64: #define __slm 1 ++// CHECK_SLM_M64: #define __slm__ 1 ++// CHECK_SLM_M64: #define __tune_slm__ 1 ++// CHECK_SLM_M64: #define __x86_64 1 ++// CHECK_SLM_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=lakemont -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_LAKEMONT_M32 ++// CHECK_LAKEMONT_M32: #define __i386 1 ++// CHECK_LAKEMONT_M32: #define __i386__ 1 ++// CHECK_LAKEMONT_M32: #define __i586 1 ++// CHECK_LAKEMONT_M32: #define __i586__ 1 ++// CHECK_LAKEMONT_M32: #define __pentium 1 ++// CHECK_LAKEMONT_M32: #define __pentium__ 1 ++// CHECK_LAKEMONT_M32: #define __tune_lakemont__ 1 ++// CHECK_LAKEMONT_M32: #define i386 1 ++// RUN: not %clang -march=lakemont -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck %s -check-prefix=CHECK_LAKEMONT_M64 ++// CHECK_LAKEMONT_M64: error: ++ ++// RUN: %clang -march=geode -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_GEODE_M32 ++// CHECK_GEODE_M32: #define __3dNOW_A__ 1 ++// CHECK_GEODE_M32: #define __3dNOW__ 1 ++// CHECK_GEODE_M32: #define __MMX__ 1 ++// CHECK_GEODE_M32: #define __geode 1 ++// CHECK_GEODE_M32: #define __geode__ 1 ++// CHECK_GEODE_M32: #define __i386 1 ++// CHECK_GEODE_M32: #define __i386__ 1 ++// CHECK_GEODE_M32: #define __tune_geode__ 1 ++// CHECK_GEODE_M32: #define i386 1 ++// RUN: not %clang -march=geode -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_GEODE_M64 ++// CHECK_GEODE_M64: error: {{.*}} ++ ++// RUN: %clang -march=k6 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_M32 ++// CHECK_K6_M32: #define __MMX__ 1 ++// CHECK_K6_M32: #define __i386 1 ++// CHECK_K6_M32: #define __i386__ 1 ++// CHECK_K6_M32: #define __k6 1 ++// CHECK_K6_M32: #define __k6__ 1 ++// CHECK_K6_M32: #define __tune_k6__ 1 ++// CHECK_K6_M32: #define i386 1 ++// RUN: not %clang -march=k6 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_M64 ++// CHECK_K6_M64: error: {{.*}} ++ ++// RUN: %clang -march=k6-2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_2_M32 ++// CHECK_K6_2_M32: #define __3dNOW__ 1 ++// CHECK_K6_2_M32: #define __MMX__ 1 ++// CHECK_K6_2_M32: #define __i386 1 ++// CHECK_K6_2_M32: #define __i386__ 1 ++// CHECK_K6_2_M32: #define __k6 1 ++// CHECK_K6_2_M32: #define __k6_2__ 1 ++// CHECK_K6_2_M32: #define __k6__ 1 ++// CHECK_K6_2_M32: #define __tune_k6_2__ 1 ++// CHECK_K6_2_M32: #define __tune_k6__ 1 ++// CHECK_K6_2_M32: #define i386 1 ++// RUN: not %clang -march=k6-2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_2_M64 ++// CHECK_K6_2_M64: error: {{.*}} ++ ++// RUN: %clang -march=k6-3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_3_M32 ++// CHECK_K6_3_M32: #define __3dNOW__ 1 ++// CHECK_K6_3_M32: #define __MMX__ 1 ++// CHECK_K6_3_M32: #define __i386 1 ++// CHECK_K6_3_M32: #define __i386__ 1 ++// CHECK_K6_3_M32: #define __k6 1 ++// CHECK_K6_3_M32: #define __k6_3__ 1 ++// CHECK_K6_3_M32: #define __k6__ 1 ++// CHECK_K6_3_M32: #define __tune_k6_3__ 1 ++// CHECK_K6_3_M32: #define __tune_k6__ 1 ++// CHECK_K6_3_M32: #define i386 1 ++// RUN: not %clang -march=k6-3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_3_M64 ++// CHECK_K6_3_M64: error: {{.*}} ++ ++// RUN: %clang -march=athlon -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_M32 ++// CHECK_ATHLON_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON_M32: #define __MMX__ 1 ++// CHECK_ATHLON_M32: #define __athlon 1 ++// CHECK_ATHLON_M32: #define __athlon__ 1 ++// CHECK_ATHLON_M32: #define __i386 1 ++// CHECK_ATHLON_M32: #define __i386__ 1 ++// CHECK_ATHLON_M32: #define __tune_athlon__ 1 ++// CHECK_ATHLON_M32: #define i386 1 ++// RUN: not %clang -march=athlon -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_M64 ++// CHECK_ATHLON_M64: error: {{.*}} ++ ++// RUN: %clang -march=athlon-tbird -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_TBIRD_M32 ++// CHECK_ATHLON_TBIRD_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_TBIRD_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON_TBIRD_M32: #define __MMX__ 1 ++// CHECK_ATHLON_TBIRD_M32: #define __athlon 1 ++// CHECK_ATHLON_TBIRD_M32: #define __athlon__ 1 ++// CHECK_ATHLON_TBIRD_M32: #define __i386 1 ++// CHECK_ATHLON_TBIRD_M32: #define __i386__ 1 ++// CHECK_ATHLON_TBIRD_M32: #define __tune_athlon__ 1 ++// CHECK_ATHLON_TBIRD_M32: #define i386 1 ++// RUN: not %clang -march=athlon-tbird -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_TBIRD_M64 ++// CHECK_ATHLON_TBIRD_M64: error: {{.*}} ++ ++// RUN: %clang -march=athlon-4 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_4_M32 ++// CHECK_ATHLON_4_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_4_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON_4_M32: #define __MMX__ 1 ++// CHECK_ATHLON_4_M32: #define __SSE__ 1 ++// CHECK_ATHLON_4_M32: #define __athlon 1 ++// CHECK_ATHLON_4_M32: #define __athlon__ 1 ++// CHECK_ATHLON_4_M32: #define __athlon_sse__ 1 ++// CHECK_ATHLON_4_M32: #define __i386 1 ++// CHECK_ATHLON_4_M32: #define __i386__ 1 ++// CHECK_ATHLON_4_M32: #define __tune_athlon__ 1 ++// CHECK_ATHLON_4_M32: #define __tune_athlon_sse__ 1 ++// CHECK_ATHLON_4_M32: #define i386 1 ++// RUN: not %clang -march=athlon-4 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_4_M64 ++// CHECK_ATHLON_4_M64: error: {{.*}} ++ ++// RUN: %clang -march=athlon-xp -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_XP_M32 ++// CHECK_ATHLON_XP_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_XP_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON_XP_M32: #define __MMX__ 1 ++// CHECK_ATHLON_XP_M32: #define __SSE__ 1 ++// CHECK_ATHLON_XP_M32: #define __athlon 1 ++// CHECK_ATHLON_XP_M32: #define __athlon__ 1 ++// CHECK_ATHLON_XP_M32: #define __athlon_sse__ 1 ++// CHECK_ATHLON_XP_M32: #define __i386 1 ++// CHECK_ATHLON_XP_M32: #define __i386__ 1 ++// CHECK_ATHLON_XP_M32: #define __tune_athlon__ 1 ++// CHECK_ATHLON_XP_M32: #define __tune_athlon_sse__ 1 ++// CHECK_ATHLON_XP_M32: #define i386 1 ++// RUN: not %clang -march=athlon-xp -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_XP_M64 ++// CHECK_ATHLON_XP_M64: error: {{.*}} ++ ++// RUN: %clang -march=athlon-mp -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_MP_M32 ++// CHECK_ATHLON_MP_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_MP_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON_MP_M32: #define __MMX__ 1 ++// CHECK_ATHLON_MP_M32: #define __SSE__ 1 ++// CHECK_ATHLON_MP_M32: #define __athlon 1 ++// CHECK_ATHLON_MP_M32: #define __athlon__ 1 ++// CHECK_ATHLON_MP_M32: #define __athlon_sse__ 1 ++// CHECK_ATHLON_MP_M32: #define __i386 1 ++// CHECK_ATHLON_MP_M32: #define __i386__ 1 ++// CHECK_ATHLON_MP_M32: #define __tune_athlon__ 1 ++// CHECK_ATHLON_MP_M32: #define __tune_athlon_sse__ 1 ++// CHECK_ATHLON_MP_M32: #define i386 1 ++// RUN: not %clang -march=athlon-mp -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_MP_M64 ++// CHECK_ATHLON_MP_M64: error: {{.*}} ++ ++// RUN: %clang -march=x86-64 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_X86_64_M32 ++// CHECK_X86_64_M32: #define __MMX__ 1 ++// CHECK_X86_64_M32: #define __SSE2__ 1 ++// CHECK_X86_64_M32: #define __SSE__ 1 ++// CHECK_X86_64_M32: #define __i386 1 ++// CHECK_X86_64_M32: #define __i386__ 1 ++// CHECK_X86_64_M32: #define __k8 1 ++// CHECK_X86_64_M32: #define __k8__ 1 ++// CHECK_X86_64_M32: #define i386 1 ++ ++// RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32 ++// CHECK_K8_M32: #define __3dNOW_A__ 1 ++// CHECK_K8_M32: #define __3dNOW__ 1 ++// CHECK_K8_M32: #define __MMX__ 1 ++// CHECK_K8_M32: #define __SSE2__ 1 ++// CHECK_K8_M32: #define __SSE__ 1 ++// CHECK_K8_M32: #define __i386 1 ++// CHECK_K8_M32: #define __i386__ 1 ++// CHECK_K8_M32: #define __k8 1 ++// CHECK_K8_M32: #define __k8__ 1 ++// CHECK_K8_M32: #define __tune_k8__ 1 ++// CHECK_K8_M32: #define i386 1 ++ ++// RUN: %clang -march=k8 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M64 ++// CHECK_K8_M64: #define __3dNOW_A__ 1 ++// CHECK_K8_M64: #define __3dNOW__ 1 ++// CHECK_K8_M64: #define __MMX__ 1 ++// CHECK_K8_M64: #define __SSE2_MATH__ 1 ++// CHECK_K8_M64: #define __SSE2__ 1 ++// CHECK_K8_M64: #define __SSE_MATH__ 1 ++// CHECK_K8_M64: #define __SSE__ 1 ++// CHECK_K8_M64: #define __amd64 1 ++// CHECK_K8_M64: #define __amd64__ 1 ++// CHECK_K8_M64: #define __k8 1 ++// CHECK_K8_M64: #define __k8__ 1 ++// CHECK_K8_M64: #define __tune_k8__ 1 ++// CHECK_K8_M64: #define __x86_64 1 ++// CHECK_K8_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=k8-sse3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_SSE3_M32 ++// CHECK_K8_SSE3_M32: #define __3dNOW_A__ 1 ++// CHECK_K8_SSE3_M32: #define __3dNOW__ 1 ++// CHECK_K8_SSE3_M32: #define __MMX__ 1 ++// CHECK_K8_SSE3_M32: #define __SSE2__ 1 ++// CHECK_K8_SSE3_M32: #define __SSE3__ 1 ++// CHECK_K8_SSE3_M32: #define __SSE__ 1 ++// CHECK_K8_SSE3_M32: #define __i386 1 ++// CHECK_K8_SSE3_M32: #define __i386__ 1 ++// CHECK_K8_SSE3_M32: #define __k8 1 ++// CHECK_K8_SSE3_M32: #define __k8__ 1 ++// CHECK_K8_SSE3_M32: #define __tune_k8__ 1 ++// CHECK_K8_SSE3_M32: #define i386 1 ++ ++// RUN: %clang -march=k8-sse3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_SSE3_M64 ++// CHECK_K8_SSE3_M64: #define __3dNOW_A__ 1 ++// CHECK_K8_SSE3_M64: #define __3dNOW__ 1 ++// CHECK_K8_SSE3_M64: #define __MMX__ 1 ++// CHECK_K8_SSE3_M64: #define __SSE2_MATH__ 1 ++// CHECK_K8_SSE3_M64: #define __SSE2__ 1 ++// CHECK_K8_SSE3_M64: #define __SSE3__ 1 ++// CHECK_K8_SSE3_M64: #define __SSE_MATH__ 1 ++// CHECK_K8_SSE3_M64: #define __SSE__ 1 ++// CHECK_K8_SSE3_M64: #define __amd64 1 ++// CHECK_K8_SSE3_M64: #define __amd64__ 1 ++// CHECK_K8_SSE3_M64: #define __k8 1 ++// CHECK_K8_SSE3_M64: #define __k8__ 1 ++// CHECK_K8_SSE3_M64: #define __tune_k8__ 1 ++// CHECK_K8_SSE3_M64: #define __x86_64 1 ++// CHECK_K8_SSE3_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=opteron -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_M32 ++// CHECK_OPTERON_M32: #define __3dNOW_A__ 1 ++// CHECK_OPTERON_M32: #define __3dNOW__ 1 ++// CHECK_OPTERON_M32: #define __MMX__ 1 ++// CHECK_OPTERON_M32: #define __SSE2__ 1 ++// CHECK_OPTERON_M32: #define __SSE__ 1 ++// CHECK_OPTERON_M32: #define __i386 1 ++// CHECK_OPTERON_M32: #define __i386__ 1 ++// CHECK_OPTERON_M32: #define __k8 1 ++// CHECK_OPTERON_M32: #define __k8__ 1 ++// CHECK_OPTERON_M32: #define __tune_k8__ 1 ++// CHECK_OPTERON_M32: #define i386 1 ++ ++// RUN: %clang -march=opteron -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_M64 ++// CHECK_OPTERON_M64: #define __3dNOW_A__ 1 ++// CHECK_OPTERON_M64: #define __3dNOW__ 1 ++// CHECK_OPTERON_M64: #define __MMX__ 1 ++// CHECK_OPTERON_M64: #define __SSE2_MATH__ 1 ++// CHECK_OPTERON_M64: #define __SSE2__ 1 ++// CHECK_OPTERON_M64: #define __SSE_MATH__ 1 ++// CHECK_OPTERON_M64: #define __SSE__ 1 ++// CHECK_OPTERON_M64: #define __amd64 1 ++// CHECK_OPTERON_M64: #define __amd64__ 1 ++// CHECK_OPTERON_M64: #define __k8 1 ++// CHECK_OPTERON_M64: #define __k8__ 1 ++// CHECK_OPTERON_M64: #define __tune_k8__ 1 ++// CHECK_OPTERON_M64: #define __x86_64 1 ++// CHECK_OPTERON_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=opteron-sse3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_SSE3_M32 ++// CHECK_OPTERON_SSE3_M32: #define __3dNOW_A__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __3dNOW__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __MMX__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __SSE2__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __SSE3__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __SSE__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __i386 1 ++// CHECK_OPTERON_SSE3_M32: #define __i386__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __k8 1 ++// CHECK_OPTERON_SSE3_M32: #define __k8__ 1 ++// CHECK_OPTERON_SSE3_M32: #define __tune_k8__ 1 ++// CHECK_OPTERON_SSE3_M32: #define i386 1 ++ ++// RUN: %clang -march=opteron-sse3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_SSE3_M64 ++// CHECK_OPTERON_SSE3_M64: #define __3dNOW_A__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __3dNOW__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __MMX__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __SSE2_MATH__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __SSE2__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __SSE3__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __SSE_MATH__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __SSE__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __amd64 1 ++// CHECK_OPTERON_SSE3_M64: #define __amd64__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __k8 1 ++// CHECK_OPTERON_SSE3_M64: #define __k8__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __tune_k8__ 1 ++// CHECK_OPTERON_SSE3_M64: #define __x86_64 1 ++// CHECK_OPTERON_SSE3_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=athlon64 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_M32 ++// CHECK_ATHLON64_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON64_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON64_M32: #define __MMX__ 1 ++// CHECK_ATHLON64_M32: #define __SSE2__ 1 ++// CHECK_ATHLON64_M32: #define __SSE__ 1 ++// CHECK_ATHLON64_M32: #define __i386 1 ++// CHECK_ATHLON64_M32: #define __i386__ 1 ++// CHECK_ATHLON64_M32: #define __k8 1 ++// CHECK_ATHLON64_M32: #define __k8__ 1 ++// CHECK_ATHLON64_M32: #define __tune_k8__ 1 ++// CHECK_ATHLON64_M32: #define i386 1 ++ ++// RUN: %clang -march=athlon64 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_M64 ++// CHECK_ATHLON64_M64: #define __3dNOW_A__ 1 ++// CHECK_ATHLON64_M64: #define __3dNOW__ 1 ++// CHECK_ATHLON64_M64: #define __MMX__ 1 ++// CHECK_ATHLON64_M64: #define __SSE2_MATH__ 1 ++// CHECK_ATHLON64_M64: #define __SSE2__ 1 ++// CHECK_ATHLON64_M64: #define __SSE_MATH__ 1 ++// CHECK_ATHLON64_M64: #define __SSE__ 1 ++// CHECK_ATHLON64_M64: #define __amd64 1 ++// CHECK_ATHLON64_M64: #define __amd64__ 1 ++// CHECK_ATHLON64_M64: #define __k8 1 ++// CHECK_ATHLON64_M64: #define __k8__ 1 ++// CHECK_ATHLON64_M64: #define __tune_k8__ 1 ++// CHECK_ATHLON64_M64: #define __x86_64 1 ++// CHECK_ATHLON64_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=athlon64-sse3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_SSE3_M32 ++// CHECK_ATHLON64_SSE3_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __MMX__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __SSE2__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __SSE3__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __SSE__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __i386 1 ++// CHECK_ATHLON64_SSE3_M32: #define __i386__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __k8 1 ++// CHECK_ATHLON64_SSE3_M32: #define __k8__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define __tune_k8__ 1 ++// CHECK_ATHLON64_SSE3_M32: #define i386 1 ++ ++// RUN: %clang -march=athlon64-sse3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_SSE3_M64 ++// CHECK_ATHLON64_SSE3_M64: #define __3dNOW_A__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __3dNOW__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __MMX__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __SSE2_MATH__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __SSE2__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __SSE3__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __SSE_MATH__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __SSE__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __amd64 1 ++// CHECK_ATHLON64_SSE3_M64: #define __amd64__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __k8 1 ++// CHECK_ATHLON64_SSE3_M64: #define __k8__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __tune_k8__ 1 ++// CHECK_ATHLON64_SSE3_M64: #define __x86_64 1 ++// CHECK_ATHLON64_SSE3_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=athlon-fx -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_FX_M32 ++// CHECK_ATHLON_FX_M32: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_FX_M32: #define __3dNOW__ 1 ++// CHECK_ATHLON_FX_M32: #define __MMX__ 1 ++// CHECK_ATHLON_FX_M32: #define __SSE2__ 1 ++// CHECK_ATHLON_FX_M32: #define __SSE__ 1 ++// CHECK_ATHLON_FX_M32: #define __i386 1 ++// CHECK_ATHLON_FX_M32: #define __i386__ 1 ++// CHECK_ATHLON_FX_M32: #define __k8 1 ++// CHECK_ATHLON_FX_M32: #define __k8__ 1 ++// CHECK_ATHLON_FX_M32: #define __tune_k8__ 1 ++// CHECK_ATHLON_FX_M32: #define i386 1 ++ ++// RUN: %clang -march=athlon-fx -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_FX_M64 ++// CHECK_ATHLON_FX_M64: #define __3dNOW_A__ 1 ++// CHECK_ATHLON_FX_M64: #define __3dNOW__ 1 ++// CHECK_ATHLON_FX_M64: #define __MMX__ 1 ++// CHECK_ATHLON_FX_M64: #define __SSE2_MATH__ 1 ++// CHECK_ATHLON_FX_M64: #define __SSE2__ 1 ++// CHECK_ATHLON_FX_M64: #define __SSE_MATH__ 1 ++// CHECK_ATHLON_FX_M64: #define __SSE__ 1 ++// CHECK_ATHLON_FX_M64: #define __amd64 1 ++// CHECK_ATHLON_FX_M64: #define __amd64__ 1 ++// CHECK_ATHLON_FX_M64: #define __k8 1 ++// CHECK_ATHLON_FX_M64: #define __k8__ 1 ++// CHECK_ATHLON_FX_M64: #define __tune_k8__ 1 ++// CHECK_ATHLON_FX_M64: #define __x86_64 1 ++// CHECK_ATHLON_FX_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=amdfam10 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDFAM10_M32 ++// CHECK_AMDFAM10_M32: #define __3dNOW_A__ 1 ++// CHECK_AMDFAM10_M32: #define __3dNOW__ 1 ++// CHECK_AMDFAM10_M32: #define __LAHF_SAHF__ 1 ++// CHECK_AMDFAM10_M32: #define __LZCNT__ 1 ++// CHECK_AMDFAM10_M32: #define __MMX__ 1 ++// CHECK_AMDFAM10_M32: #define __POPCNT__ 1 ++// CHECK_AMDFAM10_M32: #define __PRFCHW__ 1 ++// CHECK_AMDFAM10_M32: #define __SSE2_MATH__ 1 ++// CHECK_AMDFAM10_M32: #define __SSE2__ 1 ++// CHECK_AMDFAM10_M32: #define __SSE3__ 1 ++// CHECK_AMDFAM10_M32: #define __SSE4A__ 1 ++// CHECK_AMDFAM10_M32: #define __SSE_MATH__ 1 ++// CHECK_AMDFAM10_M32: #define __SSE__ 1 ++// CHECK_AMDFAM10_M32-NOT: #define __SSSE3__ 1 ++// CHECK_AMDFAM10_M32: #define __amdfam10 1 ++// CHECK_AMDFAM10_M32: #define __amdfam10__ 1 ++// CHECK_AMDFAM10_M32: #define __i386 1 ++// CHECK_AMDFAM10_M32: #define __i386__ 1 ++// CHECK_AMDFAM10_M32: #define __tune_amdfam10__ 1 ++ ++// RUN: %clang -march=amdfam10 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDFAM10_M64 ++// CHECK_AMDFAM10_M64: #define __3dNOW_A__ 1 ++// CHECK_AMDFAM10_M64: #define __3dNOW__ 1 ++// CHECK_AMDFAM10_M64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1 ++// CHECK_AMDFAM10_M64: #define __LAHF_SAHF__ 1 ++// CHECK_AMDFAM10_M64: #define __LZCNT__ 1 ++// CHECK_AMDFAM10_M64: #define __MMX__ 1 ++// CHECK_AMDFAM10_M64: #define __POPCNT__ 1 ++// CHECK_AMDFAM10_M64: #define __PRFCHW__ 1 ++// CHECK_AMDFAM10_M64: #define __SSE2_MATH__ 1 ++// CHECK_AMDFAM10_M64: #define __SSE2__ 1 ++// CHECK_AMDFAM10_M64: #define __SSE3__ 1 ++// CHECK_AMDFAM10_M64: #define __SSE4A__ 1 ++// CHECK_AMDFAM10_M64: #define __SSE_MATH__ 1 ++// CHECK_AMDFAM10_M64: #define __SSE__ 1 ++// CHECK_AMDFAM10_M64-NOT: #define __SSSE3__ 1 ++// CHECK_AMDFAM10_M64: #define __amd64 1 ++// CHECK_AMDFAM10_M64: #define __amd64__ 1 ++// CHECK_AMDFAM10_M64: #define __amdfam10 1 ++// CHECK_AMDFAM10_M64: #define __amdfam10__ 1 ++// CHECK_AMDFAM10_M64: #define __tune_amdfam10__ 1 ++// CHECK_AMDFAM10_M64: #define __x86_64 1 ++// CHECK_AMDFAM10_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=btver1 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER1_M32 ++// CHECK_BTVER1_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_BTVER1_M32-NOT: #define __3dNOW__ 1 ++// CHECK_BTVER1_M32: #define __LAHF_SAHF__ 1 ++// CHECK_BTVER1_M32: #define __LZCNT__ 1 ++// CHECK_BTVER1_M32: #define __MMX__ 1 ++// CHECK_BTVER1_M32: #define __POPCNT__ 1 ++// CHECK_BTVER1_M32: #define __PRFCHW__ 1 ++// CHECK_BTVER1_M32: #define __SSE2_MATH__ 1 ++// CHECK_BTVER1_M32: #define __SSE2__ 1 ++// CHECK_BTVER1_M32: #define __SSE3__ 1 ++// CHECK_BTVER1_M32: #define __SSE4A__ 1 ++// CHECK_BTVER1_M32: #define __SSE_MATH__ 1 ++// CHECK_BTVER1_M32: #define __SSE__ 1 ++// CHECK_BTVER1_M32: #define __SSSE3__ 1 ++// CHECK_BTVER1_M32: #define __btver1 1 ++// CHECK_BTVER1_M32: #define __btver1__ 1 ++// CHECK_BTVER1_M32: #define __i386 1 ++// CHECK_BTVER1_M32: #define __i386__ 1 ++// CHECK_BTVER1_M32: #define __tune_btver1__ 1 ++ ++// RUN: %clang -march=btver1 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER1_M64 ++// CHECK_BTVER1_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_BTVER1_M64-NOT: #define __3dNOW__ 1 ++// CHECK_BTVER1_M64: #define __LAHF_SAHF__ 1 ++// CHECK_BTVER1_M64: #define __LZCNT__ 1 ++// CHECK_BTVER1_M64: #define __MMX__ 1 ++// CHECK_BTVER1_M64: #define __POPCNT__ 1 ++// CHECK_BTVER1_M64: #define __PRFCHW__ 1 ++// CHECK_BTVER1_M64: #define __SSE2_MATH__ 1 ++// CHECK_BTVER1_M64: #define __SSE2__ 1 ++// CHECK_BTVER1_M64: #define __SSE3__ 1 ++// CHECK_BTVER1_M64: #define __SSE4A__ 1 ++// CHECK_BTVER1_M64: #define __SSE_MATH__ 1 ++// CHECK_BTVER1_M64: #define __SSE__ 1 ++// CHECK_BTVER1_M64: #define __SSSE3__ 1 ++// CHECK_BTVER1_M64: #define __amd64 1 ++// CHECK_BTVER1_M64: #define __amd64__ 1 ++// CHECK_BTVER1_M64: #define __btver1 1 ++// CHECK_BTVER1_M64: #define __btver1__ 1 ++// CHECK_BTVER1_M64: #define __tune_btver1__ 1 ++// CHECK_BTVER1_M64: #define __x86_64 1 ++// CHECK_BTVER1_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=btver2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER2_M32 ++// CHECK_BTVER2_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_BTVER2_M32-NOT: #define __3dNOW__ 1 ++// CHECK_BTVER2_M32: #define __AES__ 1 ++// CHECK_BTVER2_M32: #define __AVX__ 1 ++// CHECK_BTVER2_M32: #define __BMI__ 1 ++// CHECK_BTVER2_M32: #define __F16C__ 1 ++// CHECK_BTVER2_M32: #define __LZCNT__ 1 ++// CHECK_BTVER2_M32: #define __MMX__ 1 ++// CHECK_BTVER2_M32: #define __MOVBE__ 1 ++// CHECK_BTVER2_M32: #define __PCLMUL__ 1 ++// CHECK_BTVER2_M32: #define __POPCNT__ 1 ++// CHECK_BTVER2_M32: #define __PRFCHW__ 1 ++// CHECK_BTVER2_M32: #define __SSE2_MATH__ 1 ++// CHECK_BTVER2_M32: #define __SSE2__ 1 ++// CHECK_BTVER2_M32: #define __SSE3__ 1 ++// CHECK_BTVER2_M32: #define __SSE4A__ 1 ++// CHECK_BTVER2_M32: #define __SSE_MATH__ 1 ++// CHECK_BTVER2_M32: #define __SSE__ 1 ++// CHECK_BTVER2_M32: #define __SSSE3__ 1 ++// CHECK_BTVER2_M32: #define __XSAVEOPT__ 1 ++// CHECK_BTVER2_M32: #define __XSAVE__ 1 ++// CHECK_BTVER2_M32: #define __btver2 1 ++// CHECK_BTVER2_M32: #define __btver2__ 1 ++// CHECK_BTVER2_M32: #define __i386 1 ++// CHECK_BTVER2_M32: #define __i386__ 1 ++// CHECK_BTVER2_M32: #define __tune_btver2__ 1 ++ ++// RUN: %clang -march=btver2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER2_M64 ++// CHECK_BTVER2_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_BTVER2_M64-NOT: #define __3dNOW__ 1 ++// CHECK_BTVER2_M64: #define __AES__ 1 ++// CHECK_BTVER2_M64: #define __AVX__ 1 ++// CHECK_BTVER2_M64: #define __BMI__ 1 ++// CHECK_BTVER2_M64: #define __F16C__ 1 ++// CHECK_BTVER2_M64: #define __LZCNT__ 1 ++// CHECK_BTVER2_M64: #define __MMX__ 1 ++// CHECK_BTVER2_M64: #define __MOVBE__ 1 ++// CHECK_BTVER2_M64: #define __PCLMUL__ 1 ++// CHECK_BTVER2_M64: #define __POPCNT__ 1 ++// CHECK_BTVER2_M64: #define __PRFCHW__ 1 ++// CHECK_BTVER2_M64: #define __SSE2_MATH__ 1 ++// CHECK_BTVER2_M64: #define __SSE2__ 1 ++// CHECK_BTVER2_M64: #define __SSE3__ 1 ++// CHECK_BTVER2_M64: #define __SSE4A__ 1 ++// CHECK_BTVER2_M64: #define __SSE_MATH__ 1 ++// CHECK_BTVER2_M64: #define __SSE__ 1 ++// CHECK_BTVER2_M64: #define __SSSE3__ 1 ++// CHECK_BTVER2_M64: #define __XSAVEOPT__ 1 ++// CHECK_BTVER2_M64: #define __XSAVE__ 1 ++// CHECK_BTVER2_M64: #define __amd64 1 ++// CHECK_BTVER2_M64: #define __amd64__ 1 ++// CHECK_BTVER2_M64: #define __btver2 1 ++// CHECK_BTVER2_M64: #define __btver2__ 1 ++// CHECK_BTVER2_M64: #define __tune_btver2__ 1 ++// CHECK_BTVER2_M64: #define __x86_64 1 ++// CHECK_BTVER2_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=bdver1 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER1_M32 ++// CHECK_BDVER1_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER1_M32-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER1_M32: #define __AES__ 1 ++// CHECK_BDVER1_M32: #define __AVX__ 1 ++// CHECK_BDVER1_M32: #define __FMA4__ 1 ++// CHECK_BDVER1_M32: #define __LWP__ 1 ++// CHECK_BDVER1_M32: #define __LZCNT__ 1 ++// CHECK_BDVER1_M32: #define __MMX__ 1 ++// CHECK_BDVER1_M32: #define __PCLMUL__ 1 ++// CHECK_BDVER1_M32: #define __POPCNT__ 1 ++// CHECK_BDVER1_M32: #define __PRFCHW__ 1 ++// CHECK_BDVER1_M32: #define __SSE2_MATH__ 1 ++// CHECK_BDVER1_M32: #define __SSE2__ 1 ++// CHECK_BDVER1_M32: #define __SSE3__ 1 ++// CHECK_BDVER1_M32: #define __SSE4A__ 1 ++// CHECK_BDVER1_M32: #define __SSE4_1__ 1 ++// CHECK_BDVER1_M32: #define __SSE4_2__ 1 ++// CHECK_BDVER1_M32: #define __SSE_MATH__ 1 ++// CHECK_BDVER1_M32: #define __SSE__ 1 ++// CHECK_BDVER1_M32: #define __SSSE3__ 1 ++// CHECK_BDVER1_M32: #define __XOP__ 1 ++// CHECK_BDVER1_M32: #define __XSAVE__ 1 ++// CHECK_BDVER1_M32: #define __bdver1 1 ++// CHECK_BDVER1_M32: #define __bdver1__ 1 ++// CHECK_BDVER1_M32: #define __i386 1 ++// CHECK_BDVER1_M32: #define __i386__ 1 ++// CHECK_BDVER1_M32: #define __tune_bdver1__ 1 ++ ++// RUN: %clang -march=bdver1 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER1_M64 ++// CHECK_BDVER1_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER1_M64-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER1_M64: #define __AES__ 1 ++// CHECK_BDVER1_M64: #define __AVX__ 1 ++// CHECK_BDVER1_M64: #define __FMA4__ 1 ++// CHECK_BDVER1_M64: #define __LWP__ 1 ++// CHECK_BDVER1_M64: #define __LZCNT__ 1 ++// CHECK_BDVER1_M64: #define __MMX__ 1 ++// CHECK_BDVER1_M64: #define __PCLMUL__ 1 ++// CHECK_BDVER1_M64: #define __POPCNT__ 1 ++// CHECK_BDVER1_M64: #define __PRFCHW__ 1 ++// CHECK_BDVER1_M64: #define __SSE2_MATH__ 1 ++// CHECK_BDVER1_M64: #define __SSE2__ 1 ++// CHECK_BDVER1_M64: #define __SSE3__ 1 ++// CHECK_BDVER1_M64: #define __SSE4A__ 1 ++// CHECK_BDVER1_M64: #define __SSE4_1__ 1 ++// CHECK_BDVER1_M64: #define __SSE4_2__ 1 ++// CHECK_BDVER1_M64: #define __SSE_MATH__ 1 ++// CHECK_BDVER1_M64: #define __SSE__ 1 ++// CHECK_BDVER1_M64: #define __SSSE3__ 1 ++// CHECK_BDVER1_M64: #define __XOP__ 1 ++// CHECK_BDVER1_M64: #define __XSAVE__ 1 ++// CHECK_BDVER1_M64: #define __amd64 1 ++// CHECK_BDVER1_M64: #define __amd64__ 1 ++// CHECK_BDVER1_M64: #define __bdver1 1 ++// CHECK_BDVER1_M64: #define __bdver1__ 1 ++// CHECK_BDVER1_M64: #define __tune_bdver1__ 1 ++// CHECK_BDVER1_M64: #define __x86_64 1 ++// CHECK_BDVER1_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=bdver2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER2_M32 ++// CHECK_BDVER2_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER2_M32-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER2_M32: #define __AES__ 1 ++// CHECK_BDVER2_M32: #define __AVX__ 1 ++// CHECK_BDVER2_M32: #define __BMI__ 1 ++// CHECK_BDVER2_M32: #define __F16C__ 1 ++// CHECK_BDVER2_M32: #define __FMA4__ 1 ++// CHECK_BDVER2_M32: #define __FMA__ 1 ++// CHECK_BDVER2_M32: #define __LWP__ 1 ++// CHECK_BDVER2_M32: #define __LZCNT__ 1 ++// CHECK_BDVER2_M32: #define __MMX__ 1 ++// CHECK_BDVER2_M32: #define __PCLMUL__ 1 ++// CHECK_BDVER2_M32: #define __POPCNT__ 1 ++// CHECK_BDVER2_M32: #define __PRFCHW__ 1 ++// CHECK_BDVER2_M32: #define __SSE2_MATH__ 1 ++// CHECK_BDVER2_M32: #define __SSE2__ 1 ++// CHECK_BDVER2_M32: #define __SSE3__ 1 ++// CHECK_BDVER2_M32: #define __SSE4A__ 1 ++// CHECK_BDVER2_M32: #define __SSE4_1__ 1 ++// CHECK_BDVER2_M32: #define __SSE4_2__ 1 ++// CHECK_BDVER2_M32: #define __SSE_MATH__ 1 ++// CHECK_BDVER2_M32: #define __SSE__ 1 ++// CHECK_BDVER2_M32: #define __SSSE3__ 1 ++// CHECK_BDVER2_M32: #define __TBM__ 1 ++// CHECK_BDVER2_M32: #define __XOP__ 1 ++// CHECK_BDVER2_M32: #define __XSAVE__ 1 ++// CHECK_BDVER2_M32: #define __bdver2 1 ++// CHECK_BDVER2_M32: #define __bdver2__ 1 ++// CHECK_BDVER2_M32: #define __i386 1 ++// CHECK_BDVER2_M32: #define __i386__ 1 ++// CHECK_BDVER2_M32: #define __tune_bdver2__ 1 ++ ++// RUN: %clang -march=bdver2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER2_M64 ++// CHECK_BDVER2_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER2_M64-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER2_M64: #define __AES__ 1 ++// CHECK_BDVER2_M64: #define __AVX__ 1 ++// CHECK_BDVER2_M64: #define __BMI__ 1 ++// CHECK_BDVER2_M64: #define __F16C__ 1 ++// CHECK_BDVER2_M64: #define __FMA4__ 1 ++// CHECK_BDVER2_M64: #define __FMA__ 1 ++// CHECK_BDVER2_M64: #define __LWP__ 1 ++// CHECK_BDVER2_M64: #define __LZCNT__ 1 ++// CHECK_BDVER2_M64: #define __MMX__ 1 ++// CHECK_BDVER2_M64: #define __PCLMUL__ 1 ++// CHECK_BDVER2_M64: #define __POPCNT__ 1 ++// CHECK_BDVER2_M64: #define __PRFCHW__ 1 ++// CHECK_BDVER2_M64: #define __SSE2_MATH__ 1 ++// CHECK_BDVER2_M64: #define __SSE2__ 1 ++// CHECK_BDVER2_M64: #define __SSE3__ 1 ++// CHECK_BDVER2_M64: #define __SSE4A__ 1 ++// CHECK_BDVER2_M64: #define __SSE4_1__ 1 ++// CHECK_BDVER2_M64: #define __SSE4_2__ 1 ++// CHECK_BDVER2_M64: #define __SSE_MATH__ 1 ++// CHECK_BDVER2_M64: #define __SSE__ 1 ++// CHECK_BDVER2_M64: #define __SSSE3__ 1 ++// CHECK_BDVER2_M64: #define __TBM__ 1 ++// CHECK_BDVER2_M64: #define __XOP__ 1 ++// CHECK_BDVER2_M64: #define __XSAVE__ 1 ++// CHECK_BDVER2_M64: #define __amd64 1 ++// CHECK_BDVER2_M64: #define __amd64__ 1 ++// CHECK_BDVER2_M64: #define __bdver2 1 ++// CHECK_BDVER2_M64: #define __bdver2__ 1 ++// CHECK_BDVER2_M64: #define __tune_bdver2__ 1 ++// CHECK_BDVER2_M64: #define __x86_64 1 ++// CHECK_BDVER2_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=bdver3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER3_M32 ++// CHECK_BDVER3_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER3_M32-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER3_M32: #define __AES__ 1 ++// CHECK_BDVER3_M32: #define __AVX__ 1 ++// CHECK_BDVER3_M32: #define __BMI__ 1 ++// CHECK_BDVER3_M32: #define __F16C__ 1 ++// CHECK_BDVER3_M32: #define __FMA4__ 1 ++// CHECK_BDVER3_M32: #define __FMA__ 1 ++// CHECK_BDVER3_M32: #define __FSGSBASE__ 1 ++// CHECK_BDVER3_M32: #define __LWP__ 1 ++// CHECK_BDVER3_M32: #define __LZCNT__ 1 ++// CHECK_BDVER3_M32: #define __MMX__ 1 ++// CHECK_BDVER3_M32: #define __PCLMUL__ 1 ++// CHECK_BDVER3_M32: #define __POPCNT__ 1 ++// CHECK_BDVER3_M32: #define __PRFCHW__ 1 ++// CHECK_BDVER3_M32: #define __SSE2_MATH__ 1 ++// CHECK_BDVER3_M32: #define __SSE2__ 1 ++// CHECK_BDVER3_M32: #define __SSE3__ 1 ++// CHECK_BDVER3_M32: #define __SSE4A__ 1 ++// CHECK_BDVER3_M32: #define __SSE4_1__ 1 ++// CHECK_BDVER3_M32: #define __SSE4_2__ 1 ++// CHECK_BDVER3_M32: #define __SSE_MATH__ 1 ++// CHECK_BDVER3_M32: #define __SSE__ 1 ++// CHECK_BDVER3_M32: #define __SSSE3__ 1 ++// CHECK_BDVER3_M32: #define __TBM__ 1 ++// CHECK_BDVER3_M32: #define __XOP__ 1 ++// CHECK_BDVER3_M32: #define __XSAVEOPT__ 1 ++// CHECK_BDVER3_M32: #define __XSAVE__ 1 ++// CHECK_BDVER3_M32: #define __bdver3 1 ++// CHECK_BDVER3_M32: #define __bdver3__ 1 ++// CHECK_BDVER3_M32: #define __i386 1 ++// CHECK_BDVER3_M32: #define __i386__ 1 ++// CHECK_BDVER3_M32: #define __tune_bdver3__ 1 ++ ++// RUN: %clang -march=bdver3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER3_M64 ++// CHECK_BDVER3_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER3_M64-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER3_M64: #define __AES__ 1 ++// CHECK_BDVER3_M64: #define __AVX__ 1 ++// CHECK_BDVER3_M64: #define __BMI__ 1 ++// CHECK_BDVER3_M64: #define __F16C__ 1 ++// CHECK_BDVER3_M64: #define __FMA4__ 1 ++// CHECK_BDVER3_M64: #define __FMA__ 1 ++// CHECK_BDVER3_M64: #define __FSGSBASE__ 1 ++// CHECK_BDVER3_M64: #define __LWP__ 1 ++// CHECK_BDVER3_M64: #define __LZCNT__ 1 ++// CHECK_BDVER3_M64: #define __MMX__ 1 ++// CHECK_BDVER3_M64: #define __PCLMUL__ 1 ++// CHECK_BDVER3_M64: #define __POPCNT__ 1 ++// CHECK_BDVER3_M64: #define __PRFCHW__ 1 ++// CHECK_BDVER3_M64: #define __SSE2_MATH__ 1 ++// CHECK_BDVER3_M64: #define __SSE2__ 1 ++// CHECK_BDVER3_M64: #define __SSE3__ 1 ++// CHECK_BDVER3_M64: #define __SSE4A__ 1 ++// CHECK_BDVER3_M64: #define __SSE4_1__ 1 ++// CHECK_BDVER3_M64: #define __SSE4_2__ 1 ++// CHECK_BDVER3_M64: #define __SSE_MATH__ 1 ++// CHECK_BDVER3_M64: #define __SSE__ 1 ++// CHECK_BDVER3_M64: #define __SSSE3__ 1 ++// CHECK_BDVER3_M64: #define __TBM__ 1 ++// CHECK_BDVER3_M64: #define __XOP__ 1 ++// CHECK_BDVER3_M64: #define __XSAVEOPT__ 1 ++// CHECK_BDVER3_M64: #define __XSAVE__ 1 ++// CHECK_BDVER3_M64: #define __amd64 1 ++// CHECK_BDVER3_M64: #define __amd64__ 1 ++// CHECK_BDVER3_M64: #define __bdver3 1 ++// CHECK_BDVER3_M64: #define __bdver3__ 1 ++// CHECK_BDVER3_M64: #define __tune_bdver3__ 1 ++// CHECK_BDVER3_M64: #define __x86_64 1 ++// CHECK_BDVER3_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=bdver4 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER4_M32 ++// CHECK_BDVER4_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER4_M32-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER4_M32: #define __AES__ 1 ++// CHECK_BDVER4_M32: #define __AVX2__ 1 ++// CHECK_BDVER4_M32: #define __AVX__ 1 ++// CHECK_BDVER4_M32: #define __BMI2__ 1 ++// CHECK_BDVER4_M32: #define __BMI__ 1 ++// CHECK_BDVER4_M32: #define __F16C__ 1 ++// CHECK_BDVER4_M32: #define __FMA4__ 1 ++// CHECK_BDVER4_M32: #define __FMA__ 1 ++// CHECK_BDVER4_M32: #define __FSGSBASE__ 1 ++// CHECK_BDVER4_M32: #define __LWP__ 1 ++// CHECK_BDVER4_M32: #define __LZCNT__ 1 ++// CHECK_BDVER4_M32: #define __MMX__ 1 ++// CHECK_BDVER4_M32: #define __MOVBE__ 1 ++// CHECK_BDVER4_M32: #define __PCLMUL__ 1 ++// CHECK_BDVER4_M32: #define __POPCNT__ 1 ++// CHECK_BDVER4_M32: #define __PRFCHW__ 1 ++// CHECK_BDVER4_M32: #define __RDRND__ 1 ++// CHECK_BDVER4_M32: #define __SSE2_MATH__ 1 ++// CHECK_BDVER4_M32: #define __SSE2__ 1 ++// CHECK_BDVER4_M32: #define __SSE3__ 1 ++// CHECK_BDVER4_M32: #define __SSE4A__ 1 ++// CHECK_BDVER4_M32: #define __SSE4_1__ 1 ++// CHECK_BDVER4_M32: #define __SSE4_2__ 1 ++// CHECK_BDVER4_M32: #define __SSE_MATH__ 1 ++// CHECK_BDVER4_M32: #define __SSE__ 1 ++// CHECK_BDVER4_M32: #define __SSSE3__ 1 ++// CHECK_BDVER4_M32: #define __TBM__ 1 ++// CHECK_BDVER4_M32: #define __XOP__ 1 ++// CHECK_BDVER4_M32: #define __XSAVE__ 1 ++// CHECK_BDVER4_M32: #define __bdver4 1 ++// CHECK_BDVER4_M32: #define __bdver4__ 1 ++// CHECK_BDVER4_M32: #define __i386 1 ++// CHECK_BDVER4_M32: #define __i386__ 1 ++// CHECK_BDVER4_M32: #define __tune_bdver4__ 1 ++ ++// RUN: %clang -march=bdver4 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER4_M64 ++// CHECK_BDVER4_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_BDVER4_M64-NOT: #define __3dNOW__ 1 ++// CHECK_BDVER4_M64: #define __AES__ 1 ++// CHECK_BDVER4_M64: #define __AVX2__ 1 ++// CHECK_BDVER4_M64: #define __AVX__ 1 ++// CHECK_BDVER4_M64: #define __BMI2__ 1 ++// CHECK_BDVER4_M64: #define __BMI__ 1 ++// CHECK_BDVER4_M64: #define __F16C__ 1 ++// CHECK_BDVER4_M64: #define __FMA4__ 1 ++// CHECK_BDVER4_M64: #define __FMA__ 1 ++// CHECK_BDVER4_M64: #define __FSGSBASE__ 1 ++// CHECK_BDVER4_M64: #define __LWP__ 1 ++// CHECK_BDVER4_M64: #define __LZCNT__ 1 ++// CHECK_BDVER4_M64: #define __MMX__ 1 ++// CHECK_BDVER4_M64: #define __MOVBE__ 1 ++// CHECK_BDVER4_M64: #define __PCLMUL__ 1 ++// CHECK_BDVER4_M64: #define __POPCNT__ 1 ++// CHECK_BDVER4_M64: #define __PRFCHW__ 1 ++// CHECK_BDVER4_M64: #define __RDRND__ 1 ++// CHECK_BDVER4_M64: #define __SSE2_MATH__ 1 ++// CHECK_BDVER4_M64: #define __SSE2__ 1 ++// CHECK_BDVER4_M64: #define __SSE3__ 1 ++// CHECK_BDVER4_M64: #define __SSE4A__ 1 ++// CHECK_BDVER4_M64: #define __SSE4_1__ 1 ++// CHECK_BDVER4_M64: #define __SSE4_2__ 1 ++// CHECK_BDVER4_M64: #define __SSE_MATH__ 1 ++// CHECK_BDVER4_M64: #define __SSE__ 1 ++// CHECK_BDVER4_M64: #define __SSSE3__ 1 ++// CHECK_BDVER4_M64: #define __TBM__ 1 ++// CHECK_BDVER4_M64: #define __XOP__ 1 ++// CHECK_BDVER4_M64: #define __XSAVE__ 1 ++// CHECK_BDVER4_M64: #define __amd64 1 ++// CHECK_BDVER4_M64: #define __amd64__ 1 ++// CHECK_BDVER4_M64: #define __bdver4 1 ++// CHECK_BDVER4_M64: #define __bdver4__ 1 ++// CHECK_BDVER4_M64: #define __tune_bdver4__ 1 ++// CHECK_BDVER4_M64: #define __x86_64 1 ++// CHECK_BDVER4_M64: #define __x86_64__ 1 ++ ++// RUN: %clang -march=znver1 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER1_M32 ++// CHECK_ZNVER1_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_ZNVER1_M32-NOT: #define __3dNOW__ 1 ++// CHECK_ZNVER1_M32: #define __ADX__ 1 ++// CHECK_ZNVER1_M32: #define __AES__ 1 ++// CHECK_ZNVER1_M32: #define __AVX2__ 1 ++// CHECK_ZNVER1_M32: #define __AVX__ 1 ++// CHECK_ZNVER1_M32: #define __BMI2__ 1 ++// CHECK_ZNVER1_M32: #define __BMI__ 1 ++// CHECK_ZNVER1_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_ZNVER1_M32: #define __CLZERO__ 1 ++// CHECK_ZNVER1_M32: #define __F16C__ 1 ++// CHECK_ZNVER1_M32-NOT: #define __FMA4__ 1 ++// CHECK_ZNVER1_M32: #define __FMA__ 1 ++// CHECK_ZNVER1_M32: #define __FSGSBASE__ 1 ++// CHECK_ZNVER1_M32: #define __LZCNT__ 1 ++// CHECK_ZNVER1_M32: #define __MMX__ 1 ++// CHECK_ZNVER1_M32: #define __MOVBE__ 1 ++// CHECK_ZNVER1_M32: #define __PCLMUL__ 1 ++// CHECK_ZNVER1_M32: #define __POPCNT__ 1 ++// CHECK_ZNVER1_M32: #define __PRFCHW__ 1 ++// CHECK_ZNVER1_M32: #define __RDRND__ 1 ++// CHECK_ZNVER1_M32: #define __RDSEED__ 1 ++// CHECK_ZNVER1_M32: #define __SHA__ 1 ++// CHECK_ZNVER1_M32: #define __SSE2_MATH__ 1 ++// CHECK_ZNVER1_M32: #define __SSE2__ 1 ++// CHECK_ZNVER1_M32: #define __SSE3__ 1 ++// CHECK_ZNVER1_M32: #define __SSE4A__ 1 ++// CHECK_ZNVER1_M32: #define __SSE4_1__ 1 ++// CHECK_ZNVER1_M32: #define __SSE4_2__ 1 ++// CHECK_ZNVER1_M32: #define __SSE_MATH__ 1 ++// CHECK_ZNVER1_M32: #define __SSE__ 1 ++// CHECK_ZNVER1_M32: #define __SSSE3__ 1 ++// CHECK_ZNVER1_M32-NOT: #define __TBM__ 1 ++// CHECK_ZNVER1_M32-NOT: #define __XOP__ 1 ++// CHECK_ZNVER1_M32: #define __XSAVEC__ 1 ++// CHECK_ZNVER1_M32: #define __XSAVEOPT__ 1 ++// CHECK_ZNVER1_M32: #define __XSAVES__ 1 ++// CHECK_ZNVER1_M32: #define __XSAVE__ 1 ++// CHECK_ZNVER1_M32: #define __i386 1 ++// CHECK_ZNVER1_M32: #define __i386__ 1 ++// CHECK_ZNVER1_M32: #define __tune_znver1__ 1 ++// CHECK_ZNVER1_M32: #define __znver1 1 ++// CHECK_ZNVER1_M32: #define __znver1__ 1 ++ ++// RUN: %clang -march=znver1 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER1_M64 ++// CHECK_ZNVER1_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_ZNVER1_M64-NOT: #define __3dNOW__ 1 ++// CHECK_ZNVER1_M64: #define __ADX__ 1 ++// CHECK_ZNVER1_M64: #define __AES__ 1 ++// CHECK_ZNVER1_M64: #define __AVX2__ 1 ++// CHECK_ZNVER1_M64: #define __AVX__ 1 ++// CHECK_ZNVER1_M64: #define __BMI2__ 1 ++// CHECK_ZNVER1_M64: #define __BMI__ 1 ++// CHECK_ZNVER1_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_ZNVER1_M64: #define __CLZERO__ 1 ++// CHECK_ZNVER1_M64: #define __F16C__ 1 ++// CHECK_ZNVER1_M64-NOT: #define __FMA4__ 1 ++// CHECK_ZNVER1_M64: #define __FMA__ 1 ++// CHECK_ZNVER1_M64: #define __FSGSBASE__ 1 ++// CHECK_ZNVER1_M64: #define __LAHF_SAHF__ 1 ++// CHECK_ZNVER1_M64: #define __LZCNT__ 1 ++// CHECK_ZNVER1_M64: #define __MMX__ 1 ++// CHECK_ZNVER1_M64: #define __MOVBE__ 1 ++// CHECK_ZNVER1_M64: #define __PCLMUL__ 1 ++// CHECK_ZNVER1_M64: #define __POPCNT__ 1 ++// CHECK_ZNVER1_M64: #define __PRFCHW__ 1 ++// CHECK_ZNVER1_M64: #define __RDRND__ 1 ++// CHECK_ZNVER1_M64: #define __RDSEED__ 1 ++// CHECK_ZNVER1_M64: #define __SHA__ 1 ++// CHECK_ZNVER1_M64: #define __SSE2_MATH__ 1 ++// CHECK_ZNVER1_M64: #define __SSE2__ 1 ++// CHECK_ZNVER1_M64: #define __SSE3__ 1 ++// CHECK_ZNVER1_M64: #define __SSE4A__ 1 ++// CHECK_ZNVER1_M64: #define __SSE4_1__ 1 ++// CHECK_ZNVER1_M64: #define __SSE4_2__ 1 ++// CHECK_ZNVER1_M64: #define __SSE_MATH__ 1 ++// CHECK_ZNVER1_M64: #define __SSE__ 1 ++// CHECK_ZNVER1_M64: #define __SSSE3__ 1 ++// CHECK_ZNVER1_M64-NOT: #define __TBM__ 1 ++// CHECK_ZNVER1_M64-NOT: #define __XOP__ 1 ++// CHECK_ZNVER1_M64: #define __XSAVEC__ 1 ++// CHECK_ZNVER1_M64: #define __XSAVEOPT__ 1 ++// CHECK_ZNVER1_M64: #define __XSAVES__ 1 ++// CHECK_ZNVER1_M64: #define __XSAVE__ 1 ++// CHECK_ZNVER1_M64: #define __amd64 1 ++// CHECK_ZNVER1_M64: #define __amd64__ 1 ++// CHECK_ZNVER1_M64: #define __tune_znver1__ 1 ++// CHECK_ZNVER1_M64: #define __x86_64 1 ++// CHECK_ZNVER1_M64: #define __x86_64__ 1 ++// CHECK_ZNVER1_M64: #define __znver1 1 ++// CHECK_ZNVER1_M64: #define __znver1__ 1 ++ ++// RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32 ++// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1 ++// CHECK_ZNVER2_M32: #define __ADX__ 1 ++// CHECK_ZNVER2_M32: #define __AES__ 1 ++// CHECK_ZNVER2_M32: #define __AVX2__ 1 ++// CHECK_ZNVER2_M32: #define __AVX__ 1 ++// CHECK_ZNVER2_M32: #define __BMI2__ 1 ++// CHECK_ZNVER2_M32: #define __BMI__ 1 ++// CHECK_ZNVER2_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_ZNVER2_M32: #define __CLWB__ 1 ++// CHECK_ZNVER2_M32: #define __CLZERO__ 1 ++// CHECK_ZNVER2_M32: #define __F16C__ 1 ++// CHECK_ZNVER2_M32-NOT: #define __FMA4__ 1 ++// CHECK_ZNVER2_M32: #define __FMA__ 1 ++// CHECK_ZNVER2_M32: #define __FSGSBASE__ 1 ++// CHECK_ZNVER2_M32: #define __LAHF_SAHF__ 1 ++// CHECK_ZNVER2_M32: #define __LZCNT__ 1 ++// CHECK_ZNVER2_M32: #define __MMX__ 1 ++// CHECK_ZNVER2_M32: #define __PCLMUL__ 1 ++// CHECK_ZNVER2_M32: #define __POPCNT__ 1 ++// CHECK_ZNVER2_M32: #define __PRFCHW__ 1 ++// CHECK_ZNVER2_M32: #define __RDPID__ 1 ++// CHECK_ZNVER2_M32: #define __RDRND__ 1 ++// CHECK_ZNVER2_M32: #define __RDSEED__ 1 ++// CHECK_ZNVER2_M32: #define __SHA__ 1 ++// CHECK_ZNVER2_M32: #define __SSE2_MATH__ 1 ++// CHECK_ZNVER2_M32: #define __SSE2__ 1 ++// CHECK_ZNVER2_M32: #define __SSE3__ 1 ++// CHECK_ZNVER2_M32: #define __SSE4A__ 1 ++// CHECK_ZNVER2_M32: #define __SSE4_1__ 1 ++// CHECK_ZNVER2_M32: #define __SSE4_2__ 1 ++// CHECK_ZNVER2_M32: #define __SSE_MATH__ 1 ++// CHECK_ZNVER2_M32: #define __SSE__ 1 ++// CHECK_ZNVER2_M32: #define __SSSE3__ 1 ++// CHECK_ZNVER2_M32-NOT: #define __TBM__ 1 ++// CHECK_ZNVER2_M32: #define __WBNOINVD__ 1 ++// CHECK_ZNVER2_M32-NOT: #define __XOP__ 1 ++// CHECK_ZNVER2_M32: #define __XSAVEC__ 1 ++// CHECK_ZNVER2_M32: #define __XSAVEOPT__ 1 ++// CHECK_ZNVER2_M32: #define __XSAVES__ 1 ++// CHECK_ZNVER2_M32: #define __XSAVE__ 1 ++// CHECK_ZNVER2_M32: #define __i386 1 ++// CHECK_ZNVER2_M32: #define __i386__ 1 ++// CHECK_ZNVER2_M32: #define __tune_znver2__ 1 ++// CHECK_ZNVER2_M32: #define __znver2 1 ++// CHECK_ZNVER2_M32: #define __znver2__ 1 ++ ++// RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64 ++// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1 ++// CHECK_ZNVER2_M64: #define __ADX__ 1 ++// CHECK_ZNVER2_M64: #define __AES__ 1 ++// CHECK_ZNVER2_M64: #define __AVX2__ 1 ++// CHECK_ZNVER2_M64: #define __AVX__ 1 ++// CHECK_ZNVER2_M64: #define __BMI2__ 1 ++// CHECK_ZNVER2_M64: #define __BMI__ 1 ++// CHECK_ZNVER2_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_ZNVER2_M64: #define __CLWB__ 1 ++// CHECK_ZNVER2_M64: #define __CLZERO__ 1 ++// CHECK_ZNVER2_M64: #define __F16C__ 1 ++// CHECK_ZNVER2_M64-NOT: #define __FMA4__ 1 ++// CHECK_ZNVER2_M64: #define __FMA__ 1 ++// CHECK_ZNVER2_M64: #define __FSGSBASE__ 1 ++// CHECK_ZNVER2_M64: #define __LAHF_SAHF__ 1 ++// CHECK_ZNVER2_M64: #define __LZCNT__ 1 ++// CHECK_ZNVER2_M64: #define __MMX__ 1 ++// CHECK_ZNVER2_M64: #define __PCLMUL__ 1 ++// CHECK_ZNVER2_M64: #define __POPCNT__ 1 ++// CHECK_ZNVER2_M64: #define __PRFCHW__ 1 ++// CHECK_ZNVER2_M64: #define __RDPID__ 1 ++// CHECK_ZNVER2_M64: #define __RDRND__ 1 ++// CHECK_ZNVER2_M64: #define __RDSEED__ 1 ++// CHECK_ZNVER2_M64: #define __SHA__ 1 ++// CHECK_ZNVER2_M64: #define __SSE2_MATH__ 1 ++// CHECK_ZNVER2_M64: #define __SSE2__ 1 ++// CHECK_ZNVER2_M64: #define __SSE3__ 1 ++// CHECK_ZNVER2_M64: #define __SSE4A__ 1 ++// CHECK_ZNVER2_M64: #define __SSE4_1__ 1 ++// CHECK_ZNVER2_M64: #define __SSE4_2__ 1 ++// CHECK_ZNVER2_M64: #define __SSE_MATH__ 1 ++// CHECK_ZNVER2_M64: #define __SSE__ 1 ++// CHECK_ZNVER2_M64: #define __SSSE3__ 1 ++// CHECK_ZNVER2_M64-NOT: #define __TBM__ 1 ++// CHECK_ZNVER2_M64: #define __WBNOINVD__ 1 ++// CHECK_ZNVER2_M64-NOT: #define __XOP__ 1 ++// CHECK_ZNVER2_M64: #define __XSAVEC__ 1 ++// CHECK_ZNVER2_M64: #define __XSAVEOPT__ 1 ++// CHECK_ZNVER2_M64: #define __XSAVES__ 1 ++// CHECK_ZNVER2_M64: #define __XSAVE__ 1 ++// CHECK_ZNVER2_M64: #define __amd64 1 ++// CHECK_ZNVER2_M64: #define __amd64__ 1 ++// CHECK_ZNVER2_M64: #define __tune_znver2__ 1 ++// CHECK_ZNVER2_M64: #define __x86_64 1 ++// CHECK_ZNVER2_M64: #define __x86_64__ 1 ++// CHECK_ZNVER2_M64: #define __znver2 1 ++// CHECK_ZNVER2_M64: #define __znver2__ 1 ++ ++// RUN: %clang -march=znver3 -m32 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M32 ++// CHECK_ZNVER3_M32-NOT: #define __3dNOW_A__ 1 ++// CHECK_ZNVER3_M32-NOT: #define __3dNOW__ 1 ++// CHECK_ZNVER3_M32: #define __ADX__ 1 ++// CHECK_ZNVER3_M32: #define __AES__ 1 ++// CHECK_ZNVER3_M32: #define __AVX2__ 1 ++// CHECK_ZNVER3_M32: #define __AVX__ 1 ++// CHECK_ZNVER3_M32: #define __BMI2__ 1 ++// CHECK_ZNVER3_M32: #define __BMI__ 1 ++// CHECK_ZNVER3_M32: #define __CLFLUSHOPT__ 1 ++// CHECK_ZNVER3_M32: #define __CLWB__ 1 ++// CHECK_ZNVER3_M32: #define __CLZERO__ 1 ++// CHECK_ZNVER3_M32: #define __F16C__ 1 ++// CHECK_ZNVER3_M32-NOT: #define __FMA4__ 1 ++// CHECK_ZNVER3_M32: #define __FMA__ 1 ++// CHECK_ZNVER3_M32: #define __FSGSBASE__ 1 ++// CHECK_ZNVER3_M32: #define __LZCNT__ 1 ++// CHECK_ZNVER3_M32: #define __MMX__ 1 ++// CHECK_ZNVER3_M32: #define __PCLMUL__ 1 ++// CHECK_ZNVER3_M32: #define __PKU__ 1 ++// CHECK_ZNVER3_M32: #define __POPCNT__ 1 ++// CHECK_ZNVER3_M32: #define __PRFCHW__ 1 ++// CHECK_ZNVER3_M32: #define __RDPID__ 1 ++// CHECK_ZNVER3_M32: #define __RDRND__ 1 ++// CHECK_ZNVER3_M32: #define __RDSEED__ 1 ++// CHECK_ZNVER3_M32: #define __SHA__ 1 ++// CHECK_ZNVER3_M32: #define __SSE2_MATH__ 1 ++// CHECK_ZNVER3_M32: #define __SSE2__ 1 ++// CHECK_ZNVER3_M32: #define __SSE3__ 1 ++// CHECK_ZNVER3_M32: #define __SSE4A__ 1 ++// CHECK_ZNVER3_M32: #define __SSE4_1__ 1 ++// CHECK_ZNVER3_M32: #define __SSE4_2__ 1 ++// CHECK_ZNVER3_M32: #define __SSE_MATH__ 1 ++// CHECK_ZNVER3_M32: #define __SSE__ 1 ++// CHECK_ZNVER3_M32: #define __SSSE3__ 1 ++// CHECK_ZNVER3_M32-NOT: #define __TBM__ 1 ++// CHECK_ZNVER3_M32: #define __WBNOINVD__ 1 ++// CHECK_ZNVER3_M32-NOT: #define __XOP__ 1 ++// CHECK_ZNVER3_M32: #define __XSAVEC__ 1 ++// CHECK_ZNVER3_M32: #define __XSAVEOPT__ 1 ++// CHECK_ZNVER3_M32: #define __XSAVES__ 1 ++// CHECK_ZNVER3_M32: #define __XSAVE__ 1 ++// CHECK_ZNVER3_M32: #define __i386 1 ++// CHECK_ZNVER3_M32: #define __i386__ 1 ++// CHECK_ZNVER3_M32: #define __tune_znver3__ 1 ++// CHECK_ZNVER3_M32: #define __znver3 1 ++// CHECK_ZNVER3_M32: #define __znver3__ 1 ++ ++// RUN: %clang -march=znver3 -m64 -E -dM %s -o - 2>&1 \ ++// RUN: -target i386-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M64 ++// CHECK_ZNVER3_M64-NOT: #define __3dNOW_A__ 1 ++// CHECK_ZNVER3_M64-NOT: #define __3dNOW__ 1 ++// CHECK_ZNVER3_M64: #define __ADX__ 1 ++// CHECK_ZNVER3_M64: #define __AES__ 1 ++// CHECK_ZNVER3_M64: #define __AVX2__ 1 ++// CHECK_ZNVER3_M64: #define __AVX__ 1 ++// CHECK_ZNVER3_M64: #define __BMI2__ 1 ++// CHECK_ZNVER3_M64: #define __BMI__ 1 ++// CHECK_ZNVER3_M64: #define __CLFLUSHOPT__ 1 ++// CHECK_ZNVER3_M64: #define __CLWB__ 1 ++// CHECK_ZNVER3_M64: #define __CLZERO__ 1 ++// CHECK_ZNVER3_M64: #define __F16C__ 1 ++// CHECK_ZNVER3_M64-NOT: #define __FMA4__ 1 ++// CHECK_ZNVER3_M64: #define __FMA__ 1 ++// CHECK_ZNVER3_M64: #define __FSGSBASE__ 1 ++// CHECK_ZNVER3_M64: #define __LZCNT__ 1 ++// CHECK_ZNVER3_M64: #define __MMX__ 1 ++// CHECK_ZNVER3_M64: #define __PCLMUL__ 1 ++// CHECK_ZNVER3_M64: #define __PKU__ 1 ++// CHECK_ZNVER3_M64: #define __POPCNT__ 1 ++// CHECK_ZNVER3_M64: #define __PRFCHW__ 1 ++// CHECK_ZNVER3_M64: #define __RDPID__ 1 ++// CHECK_ZNVER3_M64: #define __RDRND__ 1 ++// CHECK_ZNVER3_M64: #define __RDSEED__ 1 ++// CHECK_ZNVER3_M64: #define __SHA__ 1 ++// CHECK_ZNVER3_M64: #define __SSE2_MATH__ 1 ++// CHECK_ZNVER3_M64: #define __SSE2__ 1 ++// CHECK_ZNVER3_M64: #define __SSE3__ 1 ++// CHECK_ZNVER3_M64: #define __SSE4A__ 1 ++// CHECK_ZNVER3_M64: #define __SSE4_1__ 1 ++// CHECK_ZNVER3_M64: #define __SSE4_2__ 1 ++// CHECK_ZNVER3_M64: #define __SSE_MATH__ 1 ++// CHECK_ZNVER3_M64: #define __SSE__ 1 ++// CHECK_ZNVER3_M64: #define __SSSE3__ 1 ++// CHECK_ZNVER3_M64-NOT: #define __TBM__ 1 ++// CHECK_ZNVER3_M64: #define __VAES__ 1 ++// CHECK_ZNVER3_M64: #define __VPCLMULQDQ__ 1 ++// CHECK_ZNVER3_M64: #define __WBNOINVD__ 1 ++// CHECK_ZNVER3_M64-NOT: #define __XOP__ 1 ++// CHECK_ZNVER3_M64: #define __XSAVEC__ 1 ++// CHECK_ZNVER3_M64: #define __XSAVEOPT__ 1 ++// CHECK_ZNVER3_M64: #define __XSAVES__ 1 ++// CHECK_ZNVER3_M64: #define __XSAVE__ 1 ++// CHECK_ZNVER3_M64: #define __amd64 1 ++// CHECK_ZNVER3_M64: #define __amd64__ 1 ++// CHECK_ZNVER3_M64: #define __tune_znver3__ 1 ++// CHECK_ZNVER3_M64: #define __x86_64 1 ++// CHECK_ZNVER3_M64: #define __x86_64__ 1 ++// CHECK_ZNVER3_M64: #define __znver3 1 ++// CHECK_ZNVER3_M64: #define __znver3__ 1 ++ ++// End X86/GCC/Linux tests ------------------ ++ ++// Begin PPC/GCC/Linux tests ---------------- ++// Check that VSX also turns on altivec. ++// RUN: %clang -mvsx -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_VSX_M32 ++// CHECK_PPC_VSX_M32: #define __ALTIVEC__ 1 ++// CHECK_PPC_VSX_M32: #define __VSX__ 1 ++ ++// RUN: %clang -mvsx -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_VSX_M64 ++// CHECK_PPC_VSX_M64: #define __VSX__ 1 ++ ++// RUN: %clang -mpower8-vector -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_POWER8_VECTOR_M64 ++// CHECK_PPC_POWER8_VECTOR_M64: #define __POWER8_VECTOR__ 1 ++ ++// RUN: %clang -mpower9-vector -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_POWER9_VECTOR_M64 ++// CHECK_PPC_POWER9_VECTOR_M64: #define __POWER9_VECTOR__ 1 ++ ++// RUN: %clang -mcrypto -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_CRYPTO_M64 ++// CHECK_PPC_CRYPTO_M64: #define __CRYPTO__ 1 ++ ++// HTM is available on power8 or later which includes all of powerpc64le as an ++// ABI choice. Test that, the cpus, and the option. ++// RUN: %clang -mhtm -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_HTM ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64le-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_HTM ++// RUN: %clang -mcpu=pwr8 -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_HTM ++// RUN: %clang -mcpu=pwr9 -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_HTM ++// CHECK_PPC_HTM: #define __HTM__ 1 ++ ++// RUN: %clang -mcpu=ppc64 -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-unknown \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_GCC_ATOMICS ++// RUN: %clang -mcpu=pwr8 -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64-unknown-unknown \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_GCC_ATOMICS ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target powerpc64le-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_PPC_GCC_ATOMICS ++// CHECK_PPC_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_PPC_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_PPC_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_PPC_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++ ++// End PPC/GCC/Linux tests ------------------ ++ ++// Begin Sparc/GCC/Linux tests ---------------- ++ ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target sparc-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC ++// CHECK_SPARC: #define __BIG_ENDIAN__ 1 ++// CHECK_SPARC: #define __sparc 1 ++// CHECK_SPARC: #define __sparc__ 1 ++// CHECK_SPARC-NOT: #define __sparcv9 1 ++// CHECK_SPARC-NOT: #define __sparcv9__ 1 ++// CHECK_SPARC: #define __sparcv8 1 ++// CHECK_SPARC-NOT: #define __sparcv9 1 ++// CHECK_SPARC-NOT: #define __sparcv9__ 1 ++ ++// RUN: %clang -mcpu=v9 -E -dM %s -o - 2>&1 \ ++// RUN: -target sparc-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC-V9 ++// CHECK_SPARC-V9-NOT: #define __sparcv8 1 ++// CHECK_SPARC-V9-NOT: #define __sparcv8__ 1 ++// CHECK_SPARC-V9: #define __sparc_v9__ 1 ++// CHECK_SPARC-V9: #define __sparcv9 1 ++// CHECK_SPARC-V9: #define __sparcv9__ 1 ++ ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target sparc-sun-solaris \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC_SOLARIS_GCC_ATOMICS ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++ ++// RUN: %clang -mcpu=v8 -E -dM %s -o - 2>&1 \ ++// RUN: -target sparc-sun-solaris \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SPARC_SOLARIS_GCC_ATOMICS-V8-NOT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++ ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target sparcel-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARCEL ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=myriad2 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=myriad2.1 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-1 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=myriad2.2 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=myriad2.3 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-3 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2100 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-1 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2150 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2155 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2450 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2455 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2x5x 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-2 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2080 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-3 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2085 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-3 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2480 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-3 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2485 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-3 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// RUN: %clang -E -dM %s -o - -target sparcel-myriad -mcpu=ma2x8x 2>&1 \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_MYRIAD2-3 \ ++// RUN: -check-prefix=CHECK_SPARCEL -check-prefix=CHECK_MYRIAD2 ++// CHECK_SPARCEL: #define __LITTLE_ENDIAN__ 1 ++// CHECK_MYRIAD2: #define __leon__ 1 ++// CHECK_MYRIAD2-1: #define __myriad2 1 ++// CHECK_MYRIAD2-1: #define __myriad2__ 1 ++// CHECK_MYRIAD2-2: #define __ma2x5x 1 ++// CHECK_MYRIAD2-2: #define __ma2x5x__ 1 ++// CHECK_MYRIAD2-2: #define __myriad2 2 ++// CHECK_MYRIAD2-2: #define __myriad2__ 2 ++// CHECK_MYRIAD2-3: #define __ma2x8x 1 ++// CHECK_MYRIAD2-3: #define __ma2x8x__ 1 ++// CHECK_MYRIAD2-3: #define __myriad2 3 ++// CHECK_MYRIAD2-3: #define __myriad2__ 3 ++// CHECK_SPARCEL: #define __sparc 1 ++// CHECK_SPARCEL: #define __sparc__ 1 ++// CHECK_MYRIAD2: #define __sparc_v8__ 1 ++// CHECK_SPARCEL: #define __sparcv8 1 ++ ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target sparcv9-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARCV9 ++// CHECK_SPARCV9: #define __BIG_ENDIAN__ 1 ++// CHECK_SPARCV9: #define __sparc 1 ++// CHECK_SPARCV9: #define __sparc64__ 1 ++// CHECK_SPARCV9: #define __sparc__ 1 ++// CHECK_SPARCV9: #define __sparc_v9__ 1 ++// CHECK_SPARCV9: #define __sparcv9 1 ++// CHECK_SPARCV9: #define __sparcv9__ 1 ++ ++// RUN: %clang -E -dM %s -o - 2>&1 \ ++// RUN: -target sparcv9-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SPARCV9_GCC_ATOMICS ++// CHECK_SPARCV9_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SPARCV9_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SPARCV9_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SPARCV9_GCC_ATOMICS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++ ++// Begin SystemZ/GCC/Linux tests ---------------- ++ ++// RUN: %clang -march=arch8 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH8 ++// RUN: %clang -march=z10 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH8 ++// CHECK_SYSTEMZ_ARCH8: #define __ARCH__ 8 ++// CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SYSTEMZ_ARCH8: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++// CHECK_SYSTEMZ_ARCH8: #define __LONG_DOUBLE_128__ 1 ++// CHECK_SYSTEMZ_ARCH8: #define __s390__ 1 ++// CHECK_SYSTEMZ_ARCH8: #define __s390x__ 1 ++// CHECK_SYSTEMZ_ARCH8: #define __zarch__ 1 ++ ++// RUN: %clang -march=arch9 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH9 ++// RUN: %clang -march=z196 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH9 ++// CHECK_SYSTEMZ_ARCH9: #define __ARCH__ 9 ++// CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SYSTEMZ_ARCH9: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++// CHECK_SYSTEMZ_ARCH9: #define __LONG_DOUBLE_128__ 1 ++// CHECK_SYSTEMZ_ARCH9: #define __s390__ 1 ++// CHECK_SYSTEMZ_ARCH9: #define __s390x__ 1 ++// CHECK_SYSTEMZ_ARCH9: #define __zarch__ 1 ++ ++// RUN: %clang -march=arch10 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH10 ++// RUN: %clang -march=zEC12 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH10 ++// CHECK_SYSTEMZ_ARCH10: #define __ARCH__ 10 ++// CHECK_SYSTEMZ_ARCH10: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SYSTEMZ_ARCH10: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SYSTEMZ_ARCH10: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SYSTEMZ_ARCH10: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++// CHECK_SYSTEMZ_ARCH10: #define __HTM__ 1 ++// CHECK_SYSTEMZ_ARCH10: #define __LONG_DOUBLE_128__ 1 ++// CHECK_SYSTEMZ_ARCH10: #define __s390__ 1 ++// CHECK_SYSTEMZ_ARCH10: #define __s390x__ 1 ++// CHECK_SYSTEMZ_ARCH10: #define __zarch__ 1 ++ ++// RUN: %clang -march=arch11 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH11 ++// RUN: %clang -march=z13 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH11 ++// CHECK_SYSTEMZ_ARCH11: #define __ARCH__ 11 ++// CHECK_SYSTEMZ_ARCH11: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SYSTEMZ_ARCH11: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SYSTEMZ_ARCH11: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SYSTEMZ_ARCH11: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++// CHECK_SYSTEMZ_ARCH11: #define __HTM__ 1 ++// CHECK_SYSTEMZ_ARCH11: #define __LONG_DOUBLE_128__ 1 ++// CHECK_SYSTEMZ_ARCH11: #define __VX__ 1 ++// CHECK_SYSTEMZ_ARCH11: #define __s390__ 1 ++// CHECK_SYSTEMZ_ARCH11: #define __s390x__ 1 ++// CHECK_SYSTEMZ_ARCH11: #define __zarch__ 1 ++ ++// RUN: %clang -march=arch12 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH12 ++// RUN: %clang -march=z14 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH12 ++// CHECK_SYSTEMZ_ARCH12: #define __ARCH__ 12 ++// CHECK_SYSTEMZ_ARCH12: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SYSTEMZ_ARCH12: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SYSTEMZ_ARCH12: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SYSTEMZ_ARCH12: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++// CHECK_SYSTEMZ_ARCH12: #define __HTM__ 1 ++// CHECK_SYSTEMZ_ARCH12: #define __LONG_DOUBLE_128__ 1 ++// CHECK_SYSTEMZ_ARCH12: #define __VX__ 1 ++// CHECK_SYSTEMZ_ARCH12: #define __s390__ 1 ++// CHECK_SYSTEMZ_ARCH12: #define __s390x__ 1 ++// CHECK_SYSTEMZ_ARCH12: #define __zarch__ 1 ++ ++// RUN: %clang -march=arch13 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH13 ++// RUN: %clang -march=z15 -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH13 ++// CHECK_SYSTEMZ_ARCH13: #define __ARCH__ 13 ++// CHECK_SYSTEMZ_ARCH13: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 ++// CHECK_SYSTEMZ_ARCH13: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 ++// CHECK_SYSTEMZ_ARCH13: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1 ++// CHECK_SYSTEMZ_ARCH13: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 ++// CHECK_SYSTEMZ_ARCH13: #define __HTM__ 1 ++// CHECK_SYSTEMZ_ARCH13: #define __LONG_DOUBLE_128__ 1 ++// CHECK_SYSTEMZ_ARCH13: #define __VX__ 1 ++// CHECK_SYSTEMZ_ARCH13: #define __s390__ 1 ++// CHECK_SYSTEMZ_ARCH13: #define __s390x__ 1 ++// CHECK_SYSTEMZ_ARCH13: #define __zarch__ 1 ++ ++// RUN: %clang -mhtm -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_HTM ++// CHECK_SYSTEMZ_HTM: #define __HTM__ 1 ++ ++// RUN: %clang -mvx -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_VX ++// CHECK_SYSTEMZ_VX: #define __VX__ 1 ++ ++// RUN: %clang -fzvector -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ZVECTOR ++// RUN: %clang -mzvector -E -dM %s -o - 2>&1 \ ++// RUN: -target s390x-unknown-linux \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ZVECTOR ++// CHECK_SYSTEMZ_ZVECTOR: #define __VEC__ 10303 ++ ++// Begin amdgcn tests ---------------- ++ ++// RUN: %clang -march=amdgcn -E -dM %s -o - 2>&1 \ ++// RUN: -target amdgcn-unknown-unknown \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDGCN ++// CHECK_AMDGCN: #define __AMDGCN__ 1 ++// CHECK_AMDGCN: #define __HAS_FMAF__ 1 ++// CHECK_AMDGCN: #define __HAS_FP64__ 1 ++// CHECK_AMDGCN: #define __HAS_LDEXPF__ 1 ++ ++// Begin r600 tests ---------------- ++ ++// RUN: %clang -march=amdgcn -E -dM %s -o - 2>&1 \ ++// RUN: -target r600-unknown-unknown \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_R600 ++// CHECK_R600: #define __R600__ 1 ++// CHECK_R600-NOT: #define __HAS_FMAF__ 1 ++ ++// RUN: %clang -march=amdgcn -mcpu=cypress -E -dM %s -o - 2>&1 \ ++// RUN: -target r600-unknown-unknown \ ++// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_R600_FP64 ++// CHECK_R600_FP64-DAG: #define __R600__ 1 ++// CHECK_R600_FP64-DAG: #define __HAS_FMAF__ 1 diff -Nru llvm-toolchain-12-12.0.0/debian/patches/sparc/sparc-D98575-compiler-rt.patch llvm-toolchain-12-12.0.1/debian/patches/sparc/sparc-D98575-compiler-rt.patch --- llvm-toolchain-12-12.0.0/debian/patches/sparc/sparc-D98575-compiler-rt.patch 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/patches/sparc/sparc-D98575-compiler-rt.patch 2021-06-21 14:43:53.000000000 +0000 @@ -0,0 +1,17 @@ +--- llvm-toolchain-12-12.0.1~+rc1.orig/compiler-rt/cmake/base-config-ix.cmake ++++ llvm-toolchain-12-12.0.1~+rc1/compiler-rt/cmake/base-config-ix.cmake +@@ -189,8 +189,12 @@ macro(test_targets) + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "s390x") + test_target_arch(s390x "" "") + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "sparc") +- test_target_arch(sparc "" "-m32") +- test_target_arch(sparcv9 "" "-m64") ++ if (CMAKE_SIZEOF_VOID_P EQUAL 4) ++ test_target_arch(sparc "" "-mcpu=v9" "-m32") ++ append("-latomic" CMAKE_LD_FLAGS) ++ else() ++ test_target_arch(sparcv9 "" "-m64") ++ endif() + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "mipsel|mips64el") + # Gcc doesn't accept -m32/-m64 so we do the next best thing and use + # -mips32r2/-mips64r2. We don't use -mips1/-mips3 because we want to match diff -Nru llvm-toolchain-12-12.0.0/debian/prepare-new-release.sh llvm-toolchain-12-12.0.1/debian/prepare-new-release.sh --- llvm-toolchain-12-12.0.0/debian/prepare-new-release.sh 2021-05-01 14:28:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/prepare-new-release.sh 2021-06-23 16:15:31.000000000 +0000 @@ -6,7 +6,7 @@ ORIG_VERSION_3=110 TARGET_VERSION_3=120 -LIST=`ls debian/control debian/orig-tar.sh debian/rules debian/patches/clang-analyzer-force-version.diff debian/patches/clang-format-version.diff debian/patches/python-clangpath.diff debian/patches/scan-build-clang-path.diff debian/patches/lldb-libname.diff debian/patches/fix-scan-view-path.diff debian/patches/lldb/lldb-addversion-suffix-to-llvm-server-exec.patch debian/patches/clang-tidy-run-bin.diff debian/patches/fix-scan-view-path.diff debian/README debian/patches/clang-analyzer-force-version.diff debian/patches/clang-tidy-run-bin.diff debian/tests/control debian/unpack.sh debian/tests/cmake-test` +LIST=`ls debian/control debian/orig-tar.sh debian/rules debian/patches/clang-analyzer-force-version.diff debian/patches/clang-format-version.diff debian/patches/python-clangpath.diff debian/patches/scan-build-clang-path.diff debian/patches/lldb-libname.diff debian/patches/fix-scan-view-path.diff debian/patches/lldb/lldb-addversion-suffix-to-llvm-server-exec.patch debian/patches/clang-tidy-run-bin.diff debian/patches/fix-scan-view-path.diff debian/README debian/patches/clang-analyzer-force-version.diff debian/patches/clang-tidy-run-bin.diff debian/tests/control debian/unpack.sh debian/tests/cmake-test debian/patches/scan-build-py-fix-analyze-path.diff` for F in $LIST; do sed -i -e "s|$ORIG_VERSION_3|$TARGET_VERSION_3|g" $F sed -i -e "s|$ORIG_VERSION_2|$TARGET_VERSION_2|g" $F diff -Nru llvm-toolchain-12-12.0.0/debian/qualify-clang.sh llvm-toolchain-12-12.0.1/debian/qualify-clang.sh --- llvm-toolchain-12-12.0.0/debian/qualify-clang.sh 2021-05-01 14:28:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/qualify-clang.sh 2021-05-29 07:40:56.000000000 +0000 @@ -9,7 +9,7 @@ DETAILED_VERSION=$(dpkg-parsechangelog | sed -rne "s,^Version: 1:([0-9.]+)(~|-)(.*),\1\2\3,p") DEB_HOST_ARCH=$(dpkg-architecture -qDEB_HOST_ARCH) -LIST="libomp5-${VERSION}_${DETAILED_VERSION}_amd64.deb libomp-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb lldb-${VERSION}_${DETAILED_VERSION}_amd64.deb python3-lldb-${VERSION}_${DETAILED_VERSION}_amd64.deb libllvm${VERSION}_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb liblldb-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libclang1-${VERSION}_${DETAILED_VERSION}_amd64.deb libclang-common-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}_${DETAILED_VERSION}_amd64.deb liblldb-${VERSION}_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}-runtime_${DETAILED_VERSION}_amd64.deb lld-${VERSION}_${DETAILED_VERSION}_amd64.deb libfuzzer-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libclang-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libc++-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libc++abi-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libc++1-${VERSION}_${DETAILED_VERSION}_amd64.deb libc++abi1-${VERSION}_${DETAILED_VERSION}_amd64.deb clang-${VERSION}_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}-tools_${DETAILED_VERSION}_amd64.deb clang-tools-${VERSION}_${DETAILED_VERSION}_amd64.deb clangd-${VERSION}_${DETAILED_VERSION}_amd64.deb libclang-cpp${VERSION}_${DETAILED_VERSION}_amd64.deb clang-tidy-${VERSION}_${DETAILED_VERSION}_amd64.deb libclang-cpp${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libclc-${VERSION}_${DETAILED_VERSION}_all.deb libclc-${VERSION}-dev_${DETAILED_VERSION}_all.deb" +LIST="libomp5-${VERSION}_${DETAILED_VERSION}_amd64.deb libomp-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb lldb-${VERSION}_${DETAILED_VERSION}_amd64.deb python3-lldb-${VERSION}_${DETAILED_VERSION}_amd64.deb libllvm${VERSION}_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb liblldb-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libclang1-${VERSION}_${DETAILED_VERSION}_amd64.deb libclang-common-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}_${DETAILED_VERSION}_amd64.deb liblldb-${VERSION}_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}-runtime_${DETAILED_VERSION}_amd64.deb lld-${VERSION}_${DETAILED_VERSION}_amd64.deb libfuzzer-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libclang-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libc++-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libc++abi-${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libc++1-${VERSION}_${DETAILED_VERSION}_amd64.deb libc++abi1-${VERSION}_${DETAILED_VERSION}_amd64.deb clang-${VERSION}_${DETAILED_VERSION}_amd64.deb llvm-${VERSION}-tools_${DETAILED_VERSION}_amd64.deb clang-tools-${VERSION}_${DETAILED_VERSION}_amd64.deb clangd-${VERSION}_${DETAILED_VERSION}_amd64.deb libclang-cpp${VERSION}_${DETAILED_VERSION}_amd64.deb clang-tidy-${VERSION}_${DETAILED_VERSION}_amd64.deb libclang-cpp${VERSION}-dev_${DETAILED_VERSION}_amd64.deb libclc-${VERSION}_${DETAILED_VERSION}_all.deb libclc-${VERSION}-dev_${DETAILED_VERSION}_all.deb llvm-${VERSION}-linker-tools_${DETAILED_VERSION}_amd64.deb" echo "To install everything:" echo "sudo apt --purge remove 'libomp5-*' 'libc++*dev' 'libc++*' 'python3-lldb-*'" echo "sudo dpkg -i $LIST" diff -Nru llvm-toolchain-12-12.0.0/debian/rules llvm-toolchain-12-12.0.1/debian/rules --- llvm-toolchain-12-12.0.0/debian/rules 2021-05-01 14:28:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/rules 2021-06-21 14:39:49.000000000 +0000 @@ -254,10 +254,6 @@ CMAKE_EXTRA += -DLLVM_USE_PERF=yes -DLLVM_ENABLE_LIBPFM=ON endif -# Use thinlto for the link phase -THIN_UNSUPPORTED_DISTRO := stretch xenial trusty -# Don't enable for old distros -ifeq (,$(filter $(DISTRO), $(THIN_UNSUPPORTED_DISTRO))) # Only enable it for archs supporting gold ifneq (,$(filter $(DEB_HOST_ARCH),$(BINUTILS_GOLD_ARCHS))) # armhf is not yet quite ready for Thin, it FTBFS @@ -272,7 +268,6 @@ # CMAKE_EXTRA += -DBOOTSTRAP_LLVM_ENABLE_LTO="Thin" endif endif -endif DH_OPTIONS= OCAML_ENABLE= no @@ -357,12 +352,6 @@ fi; \ fi - # DH doesn't support the [power ...] in install on Ubuntu trusty - # fails with cp: cannot stat 'debian/tmp/[!powerpc': No such file or directory - if test "$(DISTRO)" = "trusty"; then \ - sed -i -e "s|\[\!powerpc \!powerpcspe\] ||g" debian/llvm-$(LLVM_VERSION)-dev.install; \ - fi - # Override this two targets. They are trying to manage the .in conversion for me override_dh_ocamlinit: override_dh_ocamlclean: diff -Nru llvm-toolchain-12-12.0.0/debian/unpack.sh llvm-toolchain-12-12.0.1/debian/unpack.sh --- llvm-toolchain-12-12.0.0/debian/unpack.sh 2021-05-01 14:28:25.000000000 +0000 +++ llvm-toolchain-12-12.0.1/debian/unpack.sh 2021-07-09 07:09:40.000000000 +0000 @@ -1,6 +1,6 @@ set -e ORIG_VERSION=12 -MAJOR_VERSION=12.0.0 # 8.0.1 +MAJOR_VERSION=12.0.1 # 8.0.1 REV=`ls -1 *${ORIG_VERSION}_${MAJOR_VERSION}*~+*xz | tail -1|perl -ne 'print "$1\n" if /~\+(.*)\.orig/;' | sort -ru` VERSION=$REV @@ -15,7 +15,7 @@ cd llvm-toolchain-${ORIG_VERSION}_$MAJOR_VERSION~+$VERSION/ VER_FOUND=$(grep "PACKAGE_VERSION " libcxx/CMakeLists.txt|awk '{print $2}'|cut -d\) -f1) -if test "${MAJOR_VERSION}.0.0" != "$VER_FOUND" -a "${MAJOR_VERSION}.0.0git" != "$VER_FOUND"; then +if test "${MAJOR_VERSION}" != "$VER_FOUND" -a "${MAJOR_VERSION}.0.0" != "$VER_FOUND" -a "${MAJOR_VERSION}.0.0git" != "$VER_FOUND"; then echo "Mismatch of version" echo "Expected $MAJOR_VERSION / Found $VER_FOUND" echo "Update unpack.sh" diff -Nru llvm-toolchain-12-12.0.0/.github/workflows/libclang-abi-tests.yml llvm-toolchain-12-12.0.1/.github/workflows/libclang-abi-tests.yml --- llvm-toolchain-12-12.0.0/.github/workflows/libclang-abi-tests.yml 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/.github/workflows/libclang-abi-tests.yml 2021-07-09 07:09:47.000000000 +0000 @@ -41,7 +41,7 @@ remote_repo='https://github.com/llvm/llvm-project' if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then major_version=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) - baseline_ref="$major_version.0.0" + baseline_ref="llvmorg-$major_version.0.0" # If there is a minor release, we want to use that as the base line. minor_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9] | tail -n1 | grep -o 'llvmorg-.\+' || true` @@ -60,7 +60,7 @@ echo ::set-output name=ABI_LIBS::libclang.so else echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - echo ::set-output name=BASELINE_REF::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0 + echo ::set-output name=BASELINE_REF::llvmorg-${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0 echo ::set-output name=ABI_HEADERS::. echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so fi @@ -104,7 +104,7 @@ - name: Configure run: | mkdir install - cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm + cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g1 -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm - name: Build run: ninja -C build/ ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} install-clang-headers - name: Dump ABI @@ -113,13 +113,12 @@ for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do # Remove symbol versioning from dumps, so we can compare across major versions. sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi - tar -czf $lib-${{ matrix.ref }}.abi.tar.gz $lib-${{ matrix.ref }}.abi done - name: Upload ABI file uses: actions/upload-artifact@v2 with: name: ${{ matrix.name }} - path: "*${{ matrix.ref }}.abi.tar.gz" + path: "*${{ matrix.ref }}.abi" abi-compare: runs-on: ubuntu-latest @@ -141,7 +140,7 @@ - name: Compare ABI run: | for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do - abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi.tar.gz -new build-latest/$lib*.abi.tar.gz + abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi -new build-latest/$lib*.abi done - name: Upload ABI Comparison if: always() diff -Nru llvm-toolchain-12-12.0.0/.github/workflows/llvm-tests.yml llvm-toolchain-12-12.0.1/.github/workflows/llvm-tests.yml --- llvm-toolchain-12-12.0.0/.github/workflows/llvm-tests.yml 2021-04-15 05:53:48.000000000 +0000 +++ llvm-toolchain-12-12.0.1/.github/workflows/llvm-tests.yml 2021-07-09 07:09:47.000000000 +0000 @@ -107,7 +107,7 @@ - name: Configure run: | mkdir install - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g1 -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm - name: Build # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug). run: | @@ -126,12 +126,11 @@ abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so # Remove symbol versioning from dumps, so we can compare across major versions. sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi - tar -czf ${{ matrix.ref }}.abi.tar.gz ${{ matrix.ref }}.abi - name: Upload ABI file uses: actions/upload-artifact@v1 with: name: ${{ matrix.name }} - path: ${{ matrix.ref }}.abi.tar.gz + path: ${{ matrix.ref }}.abi - name: Upload symbol list file if: matrix.name == 'build-baseline' @@ -167,7 +166,10 @@ # This option doesn't seem to work with the ABI dumper, so passing it here. export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols" fi - abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" + # FIXME: Reading of gzip'd abi files on the GitHub runners stop + # working some time in March of 2021, likely due to a change in the + # runner's environment. + abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.abi -new build-latest/*.abi || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" - name: Upload ABI Comparison if: always() uses: actions/upload-artifact@v1 diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/CMakeLists.txt llvm-toolchain-12-12.0.1/integration-test-suite/CMakeLists.txt --- llvm-toolchain-12-12.0.0/integration-test-suite/CMakeLists.txt 2021-04-15 05:53:51.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/CMakeLists.txt 2021-07-09 07:09:49.000000000 +0000 @@ -19,6 +19,7 @@ find_program_or_warn(CLANG_BINARY clang) find_program_or_warn(CLANG_TIDY_BINARY clang-tidy) find_program_or_warn(CLANG_FORMAT_BINARY clang-format) +find_program_or_warn(CLANG_FORMAT_DIFF_BINARY clang-format-diff) find_program_or_warn(OPT_BINARY opt) find_program_or_warn(LLVMNM llvm-nm) find_program_or_warn(LLVMPROFDATA llvm-profdata) diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/.github/workflows/CI.yml llvm-toolchain-12-12.0.1/integration-test-suite/.github/workflows/CI.yml --- llvm-toolchain-12-12.0.0/integration-test-suite/.github/workflows/CI.yml 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/.github/workflows/CI.yml 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,52 @@ +name: Build with different version +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04, ubuntu-20.04] + version: [10, 11, 12, 13] + steps: + - uses: actions/checkout@v2 + + - name: Set Ubuntu codename (Bionic) + if: ${{ matrix.os == 'ubuntu-18.04'}} + run: echo "UBUNTU_CODENAME=bionic" >> $GITHUB_ENV + - name: Set Ubuntu codename (Focal) + if: ${{ matrix.os == 'ubuntu-20.04'}} + run: echo "UBUNTU_CODENAME=focal" >> $GITHUB_ENV + - name: Install Dependencies + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + if test ${{ matrix.version }} == 13; then + sudo apt-add-repository "deb http://apt.llvm.org/${{ env.UBUNTU_CODENAME }}/ llvm-toolchain-${{ env.UBUNTU_CODENAME }} main" + else + sudo apt-add-repository "deb http://apt.llvm.org/${{ env.UBUNTU_CODENAME }}/ llvm-toolchain-${{ env.UBUNTU_CODENAME }}-${{ matrix.version }} main" + fi + sudo apt-get update + sudo apt-get install -y clang-${{ matrix.version }} clang-tidy-${{ matrix.version }} clang-format-${{ matrix.version }} clang-tools-${{ matrix.version }} llvm-${{ matrix.version }}-dev lld-${{ matrix.version }} lldb-${{ matrix.version }} llvm-${{ matrix.version }}-tools libomp-${{ matrix.version }}-dev libc++-${{ matrix.version }}-dev libc++abi-${{ matrix.version }}-dev libclang-common-${{ matrix.version }}-dev libclang-${{ matrix.version }}-dev libclang-cpp${{ matrix.version }}-dev + - name: Run the testsuite + shell: bash + run: | + mkdir build && cd build + cmake -DLIT=/usr/lib/llvm-${{ matrix.version }}/build/utils/lit/lit.py \ + -DCLANG_BINARY=/usr/bin/clang-${{ matrix.version }} \ + -DCLANGXX_BINARY=/usr/bin/clang++-${{ matrix.version }} \ + -DCLANG_TIDY_BINARY=/usr/bin/clang-tidy-${{ matrix.version }} \ + -DCLANG_FORMAT_BINARY=/usr/bin/clang-format-${{ matrix.version }} \ + -DCLANG_FORMAT_DIFF_BINARY=/usr/bin/clang-format-diff-${{ matrix.version }} \ + -DLLD_BINARY=/usr/bin/lld-${{ matrix.version }} \ + -DLLDB_BINARY=/usr/bin/lldb-${{ matrix.version }} \ + -DLLVMCONFIG_BINARY=/usr/bin/llvm-config-${{ matrix.version }} \ + -DOPT_BINARY=/usr/bin/opt-${{ matrix.version }} \ + -DSCANBUILD=/usr/bin/scan-build-${{ matrix.version }} \ + -DCLANG_TIDY_BINARY=/usr/bin/clang-tidy-${{ matrix.version }} \ + -DSCANVIEW=/usr/bin/scan-view-${{ matrix.version }} \ + -DLLVMNM=/usr/bin/llvm-nm-${{ matrix.version }} \ + -DLLVMPROFDATA=/usr/bin/llvm-profdata-${{ matrix.version }} \ + -DENABLE_COMPILER_RT=ON \ + -DENABLE_LIBCXX=ON \ + ../ + make check diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/format_diff.c llvm-toolchain-12-12.0.1/integration-test-suite/tests/format_diff.c --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/format_diff.c 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/format_diff.c 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,15 @@ +// Make sure clang-format-diff works as expected +// +// REQUIRES: clang-format-diff, clang +// RUN: %clang -E -Wp,-P %s -DV0 > %t.0.c +// RUN: %clang -E -Wp,-P %s -DV1 > %t.1.c +// RUN: diff -u %t.0.c %t.1.c || true > %t.patch +// RUN: %clang-format-diff -i < %t.patch + +#if defined(V0) +int main() { +} +#elif defined(V1) +int main() { return 0; +} +#endif diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/from_chars_libc++.cpp llvm-toolchain-12-12.0.1/integration-test-suite/tests/from_chars_libc++.cpp --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/from_chars_libc++.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/from_chars_libc++.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,21 @@ +// Test charconv integration, see https://bugzilla.redhat.com/show_bug.cgi?id=1657544 +// But uses libc++ +// REQUIRES: clang, libc++ +// RUN: %clangxx -stdlib=libc++ %s -o %t +// RUN: %t 100 | grep 100 + +#include +#include +#include + +using namespace std; + +int main(int argc, char **argv) +{ + size_t r=0; + const char *begin = argv[1]; + const char *end = begin + strlen(begin); + from_chars(begin, end, r); + cout << r << '\n'; + return 0; +} diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/libclang_cpp.cpp llvm-toolchain-12-12.0.1/integration-test-suite/tests/libclang_cpp.cpp --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/libclang_cpp.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/libclang_cpp.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,36 @@ +// Test the link against libclang-cppXX +// +// REQUIRES: clang, llvm-config +// RUN: %clangxx -lclang-cpp -v %s -o %t `%llvm-config --cxxflags --libs` +// RUN: ldd %t 2>&1|grep -q libclang-cpp + +#include "clang/Tooling/CommonOptionsParser.h" +#include "llvm/Support/CommandLine.h" + +using namespace clang::tooling; +using clang::tooling::CommonOptionsParser; + +// Apply a custom category to all command-line options so that they are the +// only ones displayed. +static llvm::cl::OptionCategory MyToolCategory("my-tool options"); + +int main(int argc, const char **argv) { + // CommonOptionsParser constructor will parse arguments and create a + // CompilationDatabase. In case of error it will terminate the program. + +#if __clang_major__ < 13 + clang::tooling::CommonOptionsParser OptionsParser(argc, argv, + MyToolCategory); +#else + auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory); + if (!ExpectedParser) { + // Fail gracefully for unsupported options. + llvm::errs() << ExpectedParser.takeError(); + return 1; + } + CommonOptionsParser& OptionsParser = ExpectedParser.get(); +#endif + + // Use OptionsParser.getCompilations() and OptionsParser.getSourcePathList() + // to retrieve CompilationDatabase and the list of input file paths. +} diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/libc++_link_static.cpp llvm-toolchain-12-12.0.1/integration-test-suite/tests/libc++_link_static.cpp --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/libc++_link_static.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/libc++_link_static.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,17 @@ +// Test lib(std)c++ static +// https://bugs.llvm.org/show_bug.cgi?id=43604 +// +// Check linking with libstdc++ +// RUN: %clangxx -o %t %s -pie -static-libstdc++ +// RUN: %t +// RUN: ldd %t 2>&1|grep -qv libstdc++ +// +// Check linking with libc++. As of now, this fails: +// FAIL: %clangxx -o %t -fPIC %s -pie -stdlib=libc++ -static-libstdc++ +// +// REQUIRES: clangxx, libc++ + +#include +int main () { + std::cout << "Hello World!" << std::endl; +} diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/lit.site.cfg.in llvm-toolchain-12-12.0.1/integration-test-suite/tests/lit.site.cfg.in --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/lit.site.cfg.in 2021-04-15 05:53:51.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/lit.site.cfg.in 2021-07-09 07:09:49.000000000 +0000 @@ -18,6 +18,7 @@ # The order matters enable_program('clang-tidy', "@CLANG_TIDY_BINARY@") +enable_program('clang-format-diff', "@CLANG_FORMAT_DIFF_BINARY@") enable_program('clang-format', "@CLANG_FORMAT_BINARY@") enable_program('clangxx', "@CLANGXX_BINARY@") enable_program('clang', "@CLANG_BINARY@") diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/lto_and_lld.c llvm-toolchain-12-12.0.1/integration-test-suite/tests/lto_and_lld.c --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/lto_and_lld.c 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/lto_and_lld.c 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,20 @@ +// Test LTO support within lld +// REQUIRES: clang, lld +// RUN: %clang -c -flto %s -DLIB -o %t-obj.o +// RUN: %clang -c -flto %s -ULIB -o %t-main.o +// RUN: %clang -fuse-ld=lld -flto %t-obj.o %t-main.o -o %t +// RUN: %t | grep "hello lita" + +#ifdef LIB +#include +void greet() { + puts("hello lita"); +} +#else +extern void greet(); + +int main() { + greet(); + return 0; +} +#endif diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/rpass_inline.cpp llvm-toolchain-12-12.0.1/integration-test-suite/tests/rpass_inline.cpp --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/rpass_inline.cpp 2021-04-15 05:53:51.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/rpass_inline.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -1,9 +1,17 @@ // Test a Rpass=inline // // RUN: %clangxx -O2 -Rpass=inline %s -c &> %t.log -// RUN: grep -q "cost=always" %t.log +// RUN: grep -q -E "(inlined into main with|cost=always)" %t.log // REQUIRES: clangxx int foo(int x, int y) __attribute__((always_inline)); int foo(int x, int y) { return x + y; } int bar(int j) { return foo(j, j - 2); } + +int sum = 0; + +int main(int argc, const char *argv[]) { + for (int i = 0; i < 30; i++) + bar(argc); + return sum; +} diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/tests/whole-toolchain.cpp llvm-toolchain-12-12.0.1/integration-test-suite/tests/whole-toolchain.cpp --- llvm-toolchain-12-12.0.0/integration-test-suite/tests/whole-toolchain.cpp 2021-04-15 05:53:51.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/tests/whole-toolchain.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -1,6 +1,9 @@ // Tests all components of the toolchain // REQUIRES: clang, lld, compiler-rt, libc++ -// RUN: %clangxx -fuse-ld=lld -rtlib=compiler-rt -stdlib=libc++ %s -o %t +// +// NOTE: Adding -lgcc_eh here is required to provide unwinding information. An +// alternative would be to force usage of LLVM unwinder when building compiler-rt. +// RUN: %clangxx -fuse-ld=lld -rtlib=compiler-rt -stdlib=libc++ -lgcc_eh %s -o %t // RUN: %t | grep "Hello World" #include @@ -8,4 +11,3 @@ std::cout << "Hello World" << std::endl; return 0; } - diff -Nru llvm-toolchain-12-12.0.0/integration-test-suite/.travis.yml llvm-toolchain-12-12.0.1/integration-test-suite/.travis.yml --- llvm-toolchain-12-12.0.0/integration-test-suite/.travis.yml 2021-04-15 05:53:51.000000000 +0000 +++ llvm-toolchain-12-12.0.1/integration-test-suite/.travis.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -language: minimal -jobs: - include: - - os: linux - dist: bionic - addons: - apt: - sources: - - sourceline: 'deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' - key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' - - ubuntu-toolchain-r-test - packages: - - clang-10 - - clang-tidy-10 - - clang-format-10 - - clang-tools-10 - - llvm-10-dev - - lld-10 - - lldb-10 - - llvm-10-tools - - libomp-10-dev - - libc++-10-dev - - libc++abi-10-dev - - libstdc++-9-dev - - libclang-common-10-dev - -script: | - mkdir build && cd build && \ - cmake -DLIT=/usr/lib/llvm-10/build/utils/lit/lit.py \ - -DCLANG_BINARY=/usr/bin/clang-10 \ - -DCLANGXX_BINARY=/usr/bin/clang++-10 \ - -DCLANG_TIDY_BINARY=/usr/bin/clang-tidy-10 \ - -DCLANG_FORMAT_BINARY=/usr/bin/clang-format-10 \ - -DLLD_BINARY=/usr/bin/lld-10 \ - -DLLDB_BINARY=/usr/bin/lldb-10 \ - -DLLVMCONFIG_BINARY=/usr/bin/llvm-config-10 \ - -DOPT_BINARY=/usr/bin/opt-10 \ - -DSCANBUILD=/usr/bin/scan-build-10 \ - -DCLANG_TIDY_BINARY=/usr/bin/clang-tidy-10 \ - -DSCANVIEW=/usr/bin/scan-view-10 \ - -DLLVMNM=/usr/bin/llvm-nm-10 \ - -DLLVMPROFDATA=/usr/bin/llvm-profdata-10 \ - -DENABLE_COMPILER_RT=OFF \ - -DENABLE_LIBCXX=ON \ - ../ && \ - make check diff -Nru llvm-toolchain-12-12.0.0/libcxx/CMakeLists.txt llvm-toolchain-12-12.0.1/libcxx/CMakeLists.txt --- llvm-toolchain-12-12.0.0/libcxx/CMakeLists.txt 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/CMakeLists.txt 2021-07-09 07:09:47.000000000 +0000 @@ -29,7 +29,7 @@ project(libcxx CXX C) set(PACKAGE_NAME libcxx) - set(PACKAGE_VERSION 12.0.0) + set(PACKAGE_VERSION 12.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff -Nru llvm-toolchain-12-12.0.0/libcxx/include/__availability llvm-toolchain-12-12.0.1/libcxx/include/__availability --- llvm-toolchain-12-12.0.0/libcxx/include/__availability 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/include/__availability 2021-07-09 07:09:47.000000000 +0000 @@ -43,6 +43,14 @@ // as unavailable. When vendors decide to ship the feature as part of their // shared library, they can update the markup appropriately. // +// Furthermore, many features in the standard library have corresponding +// feature-test macros. When a feature is made unavailable on some deployment +// target, a macro should be defined to signal that it is unavailable. That +// macro can then be picked up when feature-test macros are generated (see +// generate_feature_test_macro_components.py) to make sure that feature-test +// macros don't announce a feature as being implemented if it has been marked +// as unavailable. +// // Note that this mechanism is disabled by default in the "upstream" libc++. // Availability annotations are only meaningful when shipping libc++ inside // a platform (i.e. as a system library), and so vendors that want them should @@ -76,6 +84,8 @@ // This controls the availability of std::shared_mutex and std::shared_timed_mutex, // which were added to the dylib later. # define _LIBCPP_AVAILABILITY_SHARED_MUTEX +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex // These macros control the availability of std::bad_optional_access and // other exception types. These were put in the shared library to prevent @@ -114,6 +124,7 @@ # define _LIBCPP_AVAILABILITY_FILESYSTEM # define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH # define _LIBCPP_AVAILABILITY_FILESYSTEM_POP +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem // This controls the availability of std::to_chars. # define _LIBCPP_AVAILABILITY_TO_CHARS @@ -122,6 +133,10 @@ // which requires shared library support for various operations // (see libcxx/src/atomic.cpp). # define _LIBCPP_AVAILABILITY_SYNC +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore #elif defined(__APPLE__) @@ -130,6 +145,14 @@ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex +# endif + # define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ __attribute__((availability(macosx,strict,introduced=10.13))) \ __attribute__((availability(ios,strict,introduced=11.0))) \ @@ -139,27 +162,34 @@ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS + # define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) + # define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) + # define _LIBCPP_AVAILABILITY_FUTURE_ERROR \ __attribute__((availability(ios,strict,introduced=6.0))) + # define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_FILESYSTEM \ __attribute__((availability(macosx,strict,introduced=10.15))) \ __attribute__((availability(ios,strict,introduced=13.0))) \ @@ -175,10 +205,23 @@ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem +# endif + # define _LIBCPP_AVAILABILITY_TO_CHARS \ _LIBCPP_AVAILABILITY_FILESYSTEM + + // Note: Those are not ABI-stable yet, so we can't ship them. # define _LIBCPP_AVAILABILITY_SYNC \ __attribute__((unavailable)) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore #else diff -Nru llvm-toolchain-12-12.0.0/libcxx/include/__bit_reference llvm-toolchain-12-12.0.1/libcxx/include/__bit_reference --- llvm-toolchain-12-12.0.0/libcxx/include/__bit_reference 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/include/__bit_reference 2021-07-09 07:09:47.000000000 +0000 @@ -1114,28 +1114,26 @@ #endif {} - // avoid re-declaring a copy constructor for the non-const version. - using __type_for_copy_to_const = - _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; - + // When _IsConst=false, this is the copy constructor. + // It is non-trivial. Making it trivial would break ABI. + // When _IsConst=true, this is a converting constructor; + // the copy and move constructors are implicitly generated + // and trivial. _LIBCPP_INLINE_VISIBILITY - __bit_iterator(const __type_for_copy_to_const& __it) _NOEXCEPT + __bit_iterator(const __bit_iterator<_Cp, false>& __it) _NOEXCEPT : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - // The non-const __bit_iterator has historically had a non-trivial - // copy constructor (as a quirk of its construction). We need to maintain - // this for ABI purposes. - using __type_for_abi_non_trivial_copy_ctor = - _If; - - _LIBCPP_INLINE_VISIBILITY - __bit_iterator(__type_for_abi_non_trivial_copy_ctor const& __it) _NOEXCEPT - : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - - // Always declare the copy assignment operator since the implicit declaration - // is deprecated. + // When _IsConst=false, we have a user-provided copy constructor, + // so we must also provide a copy assignment operator because + // the implicit generation of a defaulted one is deprecated. + // When _IsConst=true, the assignment operators are + // implicitly generated and trivial. _LIBCPP_INLINE_VISIBILITY - __bit_iterator& operator=(__bit_iterator const&) = default; + __bit_iterator& operator=(const _If<_IsConst, struct __private_nat, __bit_iterator>& __it) { + __seg_ = __it.__seg_; + __ctz_ = __it.__ctz_; + return *this; + } _LIBCPP_INLINE_VISIBILITY reference operator*() const _NOEXCEPT {return reference(__seg_, __storage_type(1) << __ctz_);} diff -Nru llvm-toolchain-12-12.0.0/libcxx/include/fstream llvm-toolchain-12-12.0.1/libcxx/include/fstream --- llvm-toolchain-12-12.0.0/libcxx/include/fstream 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/include/fstream 2021-07-09 07:09:47.000000000 +0000 @@ -244,7 +244,7 @@ return open(__p.c_str(), __mode); } #endif - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY basic_filebuf* __open(int __fd, ios_base::openmode __mode); #endif basic_filebuf* close(); @@ -574,7 +574,7 @@ } template -inline _LIBCPP_INLINE_VISIBILITY +inline basic_filebuf<_CharT, _Traits>* basic_filebuf<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { basic_filebuf<_CharT, _Traits>* __rt = nullptr; @@ -1326,6 +1326,7 @@ } template +inline void basic_ifstream<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { if (__sb_.__open(__fd, __mode | ios_base::in)) @@ -1539,6 +1540,7 @@ } template +inline void basic_ofstream<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { if (__sb_.__open(__fd, __mode | ios_base::out)) diff -Nru llvm-toolchain-12-12.0.0/libcxx/include/memory llvm-toolchain-12-12.0.1/libcxx/include/memory --- llvm-toolchain-12-12.0.0/libcxx/include/memory 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/include/memory 2021-07-09 07:09:47.000000000 +0000 @@ -99,14 +99,14 @@ }; template <> -class allocator // deprecated in C++17, removed in C++20 +class allocator // removed in C++20 { public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; + typedef void* pointer; // deprecated in C++17 + typedef const void* const_pointer; // deprecated in C++17 + typedef void value_type; // deprecated in C++17 - template struct rebind {typedef allocator<_Up> other;}; + template struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17 }; template @@ -786,34 +786,59 @@ template class allocator; -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS) +#if _LIBCPP_STD_VER <= 17 template <> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator +class _LIBCPP_TEMPLATE_VIS allocator { public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type; - template struct rebind {typedef allocator<_Up> other;}; + template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; template <> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator +class _LIBCPP_TEMPLATE_VIS allocator { public: - typedef const void* pointer; - typedef const void* const_pointer; - typedef const void value_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type; - template struct rebind {typedef allocator<_Up> other;}; + template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; #endif +// This class provides a non-trivial default constructor to the class that derives from it +// if the condition is satisfied. +// +// The second template parameter exists to allow giving a unique type to __non_trivial_if, +// which makes it possible to avoid breaking the ABI when making this a base class of an +// existing class. Without that, imagine we have classes D1 and D2, both of which used to +// have no base classes, but which now derive from __non_trivial_if. The layout of a class +// that inherits from both D1 and D2 will change because the two __non_trivial_if base +// classes are not allowed to share the same address. +// +// By making those __non_trivial_if base classes unique, we work around this problem and +// it is safe to start deriving from __non_trivial_if in existing classes. +template +struct __non_trivial_if { }; + +template +struct __non_trivial_if { + _LIBCPP_INLINE_VISIBILITY + _LIBCPP_CONSTEXPR __non_trivial_if() _NOEXCEPT { } +}; + // allocator +// +// Note: For ABI compatibility between C++20 and previous standards, we make +// allocator trivial in C++20. template class _LIBCPP_TEMPLATE_VIS allocator + : private __non_trivial_if::value, allocator<_Tp> > { public: typedef size_t size_type; @@ -823,7 +848,7 @@ typedef true_type is_always_equal; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - allocator() _NOEXCEPT { } + allocator() _NOEXCEPT _LIBCPP_DEFAULT template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 @@ -895,6 +920,7 @@ template class _LIBCPP_TEMPLATE_VIS allocator + : private __non_trivial_if::value, allocator > { public: typedef size_t size_type; @@ -904,7 +930,7 @@ typedef true_type is_always_equal; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - allocator() _NOEXCEPT { } + allocator() _NOEXCEPT _LIBCPP_DEFAULT template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 @@ -2745,7 +2771,6 @@ typename enable_if < !is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type = __nat()); @@ -2754,7 +2779,6 @@ typename enable_if < is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type = __nat()); @@ -2795,7 +2819,6 @@ template typename enable_if < - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, shared_ptr& >::type @@ -3157,7 +3180,6 @@ typename enable_if < !is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type) @@ -3170,7 +3192,7 @@ #endif { typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk; + typedef __shared_ptr_pointer::pointer, _Dp, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), _AllocT()); __enable_weak_this(__r.get(), __r.get()); } @@ -3183,7 +3205,6 @@ typename enable_if < is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type) @@ -3196,7 +3217,7 @@ #endif { typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, + typedef __shared_ptr_pointer::pointer, reference_wrapper::type>, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), _VSTD::ref(__r.get_deleter()), _AllocT()); @@ -3280,7 +3301,6 @@ inline typename enable_if < - !is_array<_Yp>::value && is_convertible::pointer, typename shared_ptr<_Tp>::element_type*>::value, shared_ptr<_Tp>& diff -Nru llvm-toolchain-12-12.0.0/libcxx/include/version llvm-toolchain-12-12.0.1/libcxx/include/version --- llvm-toolchain-12-12.0.0/libcxx/include/version 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/include/version 2021-07-09 07:09:47.000000000 +0000 @@ -184,7 +184,7 @@ # define __cpp_lib_quoted_string_io 201304L # define __cpp_lib_result_of_sfinae 201210L # define __cpp_lib_robust_nonmodifying_seq_ops 201304L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # define __cpp_lib_shared_timed_mutex 201402L # endif # define __cpp_lib_string_udls 201304L @@ -213,7 +213,9 @@ # define __cpp_lib_clamp 201603L # define __cpp_lib_enable_shared_from_this 201603L // # define __cpp_lib_execution 201603L -# define __cpp_lib_filesystem 201703L +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# define __cpp_lib_filesystem 201703L +# endif # define __cpp_lib_gcd_lcm 201606L // # define __cpp_lib_hardware_interference_size 201703L # if defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS) @@ -241,7 +243,7 @@ # define __cpp_lib_raw_memory_algorithms 201606L # define __cpp_lib_sample 201603L # define __cpp_lib_scoped_lock 201703L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # define __cpp_lib_shared_mutex 201505L # endif # define __cpp_lib_shared_ptr_arrays 201611L @@ -279,10 +281,10 @@ # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_value_initialization 201911L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # define __cpp_lib_atomic_wait 201907L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # define __cpp_lib_barrier 201907L # endif // # define __cpp_lib_bind_front 201907L @@ -326,7 +328,7 @@ # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_jthread 201911L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # define __cpp_lib_latch 201907L # endif # define __cpp_lib_list_remove_return_type 201806L @@ -336,7 +338,7 @@ // # define __cpp_lib_polymorphic_allocator 201902L // # define __cpp_lib_ranges 201811L # define __cpp_lib_remove_cvref 201711L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # define __cpp_lib_semaphore 201907L # endif # define __cpp_lib_shift 201806L diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -33,18 +33,19 @@ #include #include -#include "test_macros.h" - -int main(int, char**) -{ - static_assert((std::is_same::size_type, std::size_t>::value), ""); - static_assert((std::is_same::difference_type, std::ptrdiff_t>::value), ""); - static_assert((std::is_same::pointer, char*>::value), ""); - static_assert((std::is_same::const_pointer, const char*>::value), ""); - static_assert((std::is_same::reference, char&>::value), ""); - static_assert((std::is_same::const_reference, const char&>::value), ""); - static_assert((std::is_same::rebind::other, +template +void test() { + static_assert((std::is_same::size_type, std::size_t>::value), ""); + static_assert((std::is_same::difference_type, std::ptrdiff_t>::value), ""); + static_assert((std::is_same::pointer, T*>::value), ""); + static_assert((std::is_same::const_pointer, const T*>::value), ""); + static_assert((std::is_same::reference, T&>::value), ""); + static_assert((std::is_same::const_reference, const T&>::value), ""); + static_assert((std::is_same::template rebind::other, std::allocator >::value), ""); +} +int main(int, char**) { + test(); return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// Check that the following member types of allocator are provided -// regardless of the Standard when we request them from libc++. - -// template <> -// class allocator -// { -// public: -// typedef void* pointer; -// typedef const void* const_pointer; -// typedef void value_type; -// -// template struct rebind {typedef allocator<_Up> other;}; -// }; - -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS - -#include -#include - -#include "test_macros.h" - -int main(int, char**) -{ - static_assert((std::is_same::pointer, void*>::value), ""); - static_assert((std::is_same::const_pointer, const void*>::value), ""); - static_assert((std::is_same::value_type, void>::value), ""); - static_assert((std::is_same::rebind::other, - std::allocator >::value), ""); - std::allocator a; - std::allocator a2 = a; - a2 = a; - - return 0; -} diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py llvm-toolchain-12-12.0.1/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py 2021-07-09 07:09:47.000000000 +0000 @@ -37,6 +37,23 @@ compare_frame = gdb.newest_frame().older() testcase_frame = compare_frame.older() test_loc = testcase_frame.find_sal() + + expectation_val = compare_frame.read_var("expectation") + check_literal = expectation_val.string(encoding="utf-8") + + # Heuristic to determine if libc++ itself has debug + # info. If it doesn't, then anything normally homed there + # won't be found, and the printer will error. We don't + # want to fail the test in this case--the printer itself + # is probably fine, or at least we can't tell. + if check_literal.startswith("std::shared_ptr"): + shared_ptr = compare_frame.read_var("value") + if not "__shared_owners_" in shared_ptr.type.fields(): + print("IGNORED (no debug info in libc++): " + + test_loc.symtab.filename + ":" + + str(test_loc.line)) + return + # Use interactive commands in the correct context to get the pretty # printed version @@ -45,11 +62,10 @@ # Ignore the convenience variable name and newline value = value_str[value_str.find("= ") + 2:-1] gdb.newest_frame().select() - expectation_val = compare_frame.read_var("expectation") check_literal = expectation_val.string(encoding="utf-8") if "PrettyPrintToRegex" in compare_frame.name(): - test_fails = not re.match(check_literal, value) + test_fails = not re.search(check_literal, value) else: test_fails = value != check_literal diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp llvm-toolchain-12-12.0.1/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -427,9 +427,9 @@ void vector_test() { std::vector test0 = {true, false}; - ComparePrettyPrintToChars(test0, + ComparePrettyPrintToRegex(test0, "std::vector of " - "length 2, capacity 64 = {1, 0}"); + "length 2, capacity (32|64) = {1, 0}"); for (int i = 0; i < 31; ++i) { test0.push_back(true); test0.push_back(false); @@ -444,9 +444,9 @@ ComparePrettyPrintToRegex( test0, "std::vector of length 65, " - "capacity 128 = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, " - "1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, " - "1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}"); + "capacity (96|128) = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, " + "0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, " + "0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}"); std::vector test1; ComparePrettyPrintToChars(test1, "std::vector of length 0, capacity 0"); @@ -489,8 +489,9 @@ auto not_found = one_two_three.find(7); MarkAsLive(not_found); - CompareExpressionPrettyPrintToRegex("not_found", - R"(std::__map_iterator = {\[0x[a-f0-9]+\] = end})"); + // Because the end_node is not easily detected, just be sure it doesn't crash. + CompareExpressionPrettyPrintToRegex( + "not_found", R"(std::__map_iterator ( = {\[0x[a-f0-9]+\] = .*}|))"); } void unordered_set_test() { @@ -607,25 +608,27 @@ // due to which there is one more count for the pointer. Hence, all the // following tests are testing with expected count plus 1. std::shared_ptr test0 = std::make_shared(5); + // The python regular expression matcher treats newlines as significant, so + // these regular expressions should be on one line. ComparePrettyPrintToRegex( test0, - R"(std::shared_ptr count 2, weak 0 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [2\?], weak [0\?]( $libc\+\+ missing debug info$)? containing = {__ptr_ = 0x[a-f0-9]+})"); std::shared_ptr test1(test0); ComparePrettyPrintToRegex( test1, - R"(std::shared_ptr count 3, weak 0 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [3\?], weak [0\?]( $libc\+\+ missing debug info$)? containing = {__ptr_ = 0x[a-f0-9]+})"); { std::weak_ptr test2 = test1; ComparePrettyPrintToRegex( test0, - R"(std::shared_ptr count 3, weak 1 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [3\?], weak [1\?]( $libc\+\+ missing debug info$)? containing = {__ptr_ = 0x[a-f0-9]+})"); } ComparePrettyPrintToRegex( test0, - R"(std::shared_ptr count 3, weak 0 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [3\?], weak [0\?]( $libc\+\+ missing debug info$)? containing = {__ptr_ = 0x[a-f0-9]+})"); std::shared_ptr test3; ComparePrettyPrintToChars(test3, "std::shared_ptr is nullptr"); diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Make sure that std::allocator is trivial. This was the case before C++20 +// with the std::allocator explicit specialization, and this test makes sure +// that we maintain that property across all standards. +// +// This is important since triviality has implications on how the type is passed +// as a function argument in the ABI. + +#include +#include + +typedef std::allocator A1; +typedef std::allocator A2; +struct A3 : std::allocator { }; +struct A4 : std::allocator { }; + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -46,7 +46,11 @@ // // With trivial_abi, local_addr is the address of a local variable in // make_val, and hence different from &ret. +#if !defined(__i386__) + // On X86, structs are never returned in registers. + // Thus, unique_ptr will be passed indirectly even if it is trivial. assert((void*)&ret != local_addr); +#endif return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -49,9 +49,10 @@ // // With trivial_abi, local_addr is the address of a local variable in // make_val, and hence different from &ret. -#ifndef __arm__ +#if !defined(__i386__) && !defined(__arm__) + // On X86, structs are never returned in registers. // On ARM32, structs larger than 4 bytes cannot be returned in registers. - // Thus, weak_ptr will be passed indrectly even if it is trivial. + // Thus, weak_ptr will be passed indirectly even if it is trivial. assert((void*)&ret != local_addr); #endif return 0; diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -35,7 +35,7 @@ struct promise_type { std::shared_ptr data; - std::shared_ptr get_return_object() { data = std::make_shared(); return data; } + expected get_return_object() { data = std::make_shared(); return {data}; } suspend_never initial_suspend() { return {}; } suspend_never final_suspend() noexcept { return {}; } void return_value(T v) { data->val = v; data->error = {}; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// #define __cpp_lib_filesystem 201703L - -#include -#include "test_macros.h" - -#if TEST_STD_VER >= 17 -#ifndef __cpp_lib_filesystem -#error Filesystem feature test macro is not defined (__cpp_lib_filesystem) -#elif __cpp_lib_filesystem != 201703L -#error Filesystem feature test macro has an incorrect value (__cpp_lib_filesystem) -#endif -#else // TEST_STD_VER < 17 -#ifdef __cpp_lib_filesystem -#error Filesystem feature test macro should not be defined before C++17 -#endif -#endif - -int main(int, char**) { - return 0; -} diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -248,7 +248,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++20" # endif @@ -257,7 +257,7 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif @@ -367,7 +367,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++2b" # endif @@ -376,7 +376,7 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -44,7 +44,7 @@ #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++20" # endif @@ -53,13 +53,13 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++2b" # endif @@ -68,7 +68,7 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -51,11 +51,17 @@ # error "__cpp_lib_char8_t should not be defined before c++20" # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++17" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++17" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif #elif TEST_STD_VER == 20 @@ -73,11 +79,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++20" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++20" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif #elif TEST_STD_VER > 20 @@ -95,11 +107,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++2b" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++2b" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif #endif // TEST_STD_VER > 20 diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -44,7 +44,7 @@ #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++20" # endif @@ -53,13 +53,13 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++2b" # endif @@ -68,7 +68,7 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -44,7 +44,7 @@ #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++20" # endif @@ -53,13 +53,13 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++2b" # endif @@ -68,7 +68,7 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -41,7 +41,7 @@ # error "__cpp_lib_shared_mutex should not be defined before c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++14" # endif @@ -50,13 +50,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif #elif TEST_STD_VER == 17 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" # endif @@ -65,11 +65,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++17" # endif @@ -78,13 +78,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" # endif @@ -93,11 +93,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++20" # endif @@ -106,13 +106,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++2b" # endif @@ -121,11 +121,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++2b" # endif @@ -134,7 +134,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -1133,7 +1133,7 @@ # error "__cpp_lib_shared_ptr_weak_type should not be defined before c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++14" # endif @@ -1142,7 +1142,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif @@ -1534,11 +1534,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++17" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++17" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif # ifndef __cpp_lib_gcd_lcm @@ -1883,7 +1889,7 @@ # error "__cpp_lib_semaphore should not be defined before c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" # endif @@ -1892,7 +1898,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif @@ -1910,7 +1916,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++17" # endif @@ -1919,7 +1925,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif @@ -2223,7 +2229,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++20" # endif @@ -2232,11 +2238,11 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++20" # endif @@ -2245,7 +2251,7 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif @@ -2575,11 +2581,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++20" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++20" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif # ifndef __cpp_lib_gcd_lcm @@ -2795,7 +2807,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++20" # endif @@ -2804,7 +2816,7 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif @@ -3019,7 +3031,7 @@ # error "__cpp_lib_scoped_lock should have the value 201703L in c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++20" # endif @@ -3028,11 +3040,11 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" # endif @@ -3041,7 +3053,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif @@ -3059,7 +3071,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++20" # endif @@ -3068,7 +3080,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif @@ -3429,7 +3441,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++2b" # endif @@ -3438,11 +3450,11 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++2b" # endif @@ -3451,7 +3463,7 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif @@ -3781,11 +3793,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++2b" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++2b" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif # ifndef __cpp_lib_gcd_lcm @@ -4004,7 +4022,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++2b" # endif @@ -4013,7 +4031,7 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif @@ -4228,7 +4246,7 @@ # error "__cpp_lib_scoped_lock should have the value 201703L in c++2b" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++2b" # endif @@ -4237,11 +4255,11 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++2b" # endif @@ -4250,7 +4268,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif @@ -4268,7 +4286,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++2b" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++2b" # endif @@ -4277,7 +4295,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp 2020-10-16 21:13:08.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -7,15 +7,14 @@ //===----------------------------------------------------------------------===// // -// UNSUPPORTED: c++03, c++11, c++14, c++17 // // template // class allocator // { // public: // All of these are constexpr after C++17 -// constexpr allocator() noexcept; -// constexpr allocator(const allocator&) noexcept; -// template constexpr allocator(const allocator&) noexcept; +// allocator() noexcept; +// allocator(const allocator&) noexcept; +// template allocator(const allocator&) noexcept; // ... // }; @@ -24,28 +23,27 @@ #include "test_macros.h" +template +TEST_CONSTEXPR_CXX20 bool test() { + typedef std::allocator A1; + typedef std::allocator A2; + + A1 a1; + A1 a1_copy = a1; (void)a1_copy; + A2 a2 = a1; (void)a2; -int main(int, char**) -{ - { - typedef std::allocator AC; - typedef std::allocator AL; - - constexpr AC a1; - constexpr AC a2{a1}; - constexpr AL a3{a2}; - (void) a3; - } - { - typedef std::allocator AC; - typedef std::allocator AL; - - constexpr AC a1; - constexpr AC a2{a1}; - constexpr AL a3{a2}; - (void) a3; - } - + return true; +} +int main(int, char**) { + test(); + test(); + test(); + +#if TEST_STD_VER > 17 + static_assert(test()); + static_assert(test()); + static_assert(test()); +#endif return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -13,7 +13,6 @@ #include - template constexpr bool test() { std::allocator alloc; @@ -26,11 +25,13 @@ int main(int, char**) { test(); + test(); #ifdef _LIBCPP_VERSION // extension test(); #endif // _LIBCPP_VERSION static_assert(test()); + static_assert(test()); #ifdef _LIBCPP_VERSION // extension static_assert(test()); #endif // _LIBCPP_VERSION diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp 2020-10-16 21:13:08.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -11,9 +11,9 @@ #include #include -// #include - #include "test_macros.h" + +// // // template // struct allocator_traits diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -30,20 +30,27 @@ // UNSUPPORTED: clang-6 #include -#include "test_macros.h" -int main(int, char**) -{ - typedef std::allocator::pointer AP; // expected-warning {{'pointer' is deprecated}} - typedef std::allocator::const_pointer ACP; // expected-warning {{'const_pointer' is deprecated}} - typedef std::allocator::reference AR; // expected-warning {{'reference' is deprecated}} - typedef std::allocator::const_reference ACR; // expected-warning {{'const_reference' is deprecated}} - typedef std::allocator::rebind::other ARO; // expected-warning {{'rebind' is deprecated}} - - typedef std::allocator::pointer AP2; // expected-warning {{'pointer' is deprecated}} - typedef std::allocator::const_pointer ACP2; // expected-warning {{'const_pointer' is deprecated}} - typedef std::allocator::reference AR2; // expected-warning {{'reference' is deprecated}} - typedef std::allocator::const_reference ACR2; // expected-warning {{'const_reference' is deprecated}} - typedef std::allocator::rebind::other ARO2; // expected-warning {{'rebind' is deprecated}} +int main(int, char**) { + { + typedef std::allocator::pointer Pointer; // expected-warning {{'pointer' is deprecated}} + typedef std::allocator::const_pointer ConstPointer; // expected-warning {{'const_pointer' is deprecated}} + typedef std::allocator::reference Reference; // expected-warning {{'reference' is deprecated}} + typedef std::allocator::const_reference ConstReference; // expected-warning {{'const_reference' is deprecated}} + typedef std::allocator::rebind::other Rebind; // expected-warning {{'rebind' is deprecated}} + } + { + typedef std::allocator::pointer Pointer; // expected-warning {{'pointer' is deprecated}} + typedef std::allocator::const_pointer ConstPointer; // expected-warning {{'const_pointer' is deprecated}} + typedef std::allocator::reference Reference; // expected-warning {{'reference' is deprecated}} + typedef std::allocator::const_reference ConstReference; // expected-warning {{'const_reference' is deprecated}} + typedef std::allocator::rebind::other Rebind; // expected-warning {{'rebind' is deprecated}} + } + { + typedef std::allocator::pointer Pointer; // expected-warning {{'pointer' is deprecated}} + typedef std::allocator::const_pointer ConstPointer; // expected-warning {{'const_pointer' is deprecated}} + // reference and const_reference are not provided by std::allocator + typedef std::allocator::rebind::other Rebind; // expected-warning {{'rebind' is deprecated}} + } return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -18,47 +18,49 @@ // typedef ptrdiff_t difference_type; // typedef T value_type; // +// typedef T* pointer; // deprecated in C++17, removed in C++20 +// typedef T const* const_pointer; // deprecated in C++17, removed in C++20 +// typedef T& reference; // deprecated in C++17, removed in C++20 +// typedef T const& const_reference; // deprecated in C++17, removed in C++20 +// template< class U > struct rebind { typedef allocator other; }; // deprecated in C++17, removed in C++20 +// // typedef true_type propagate_on_container_move_assignment; // typedef true_type is_always_equal; // ... // }; +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + #include #include #include #include "test_macros.h" -template -TEST_CONSTEXPR_CXX20 bool test() -{ - static_assert((std::is_same::size_type, std::size_t>::value), ""); - static_assert((std::is_same::difference_type, std::ptrdiff_t>::value), ""); - static_assert((std::is_same::value_type, T>::value), ""); - static_assert((std::is_same::propagate_on_container_move_assignment, std::true_type>::value), ""); - static_assert((std::is_same::is_always_equal, std::true_type>::value), ""); - - std::allocator a; - std::allocator a2 = a; - a2 = a; - std::allocator a3 = a2; - (void)a3; +struct U; - return true; +template +void test() { + typedef std::allocator Alloc; + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + +#if TEST_STD_VER <= 17 + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::other, std::allocator >::value), ""); +#endif } -int main(int, char**) -{ - test(); -#ifdef _LIBCPP_VERSION // extension - test(); -#endif // _LIBCPP_VERSION - -#if TEST_STD_VER > 17 - static_assert(test()); -#ifdef _LIBCPP_VERSION // extension - static_assert(test()); -#endif // _LIBCPP_VERSION +int main(int, char**) { + test(); +#ifdef _LIBCPP_VERSION + test(); // extension #endif return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -31,16 +31,17 @@ template void check() { - typedef typename std::allocator::pointer AP; // expected-error 2 {{no type named 'pointer'}} - typedef typename std::allocator::const_pointer ACP; // expected-error 2 {{no type named 'const_pointer'}} - typedef typename std::allocator::reference AR; // expected-error 2 {{no type named 'reference'}} - typedef typename std::allocator::const_reference ACR; // expected-error 2 {{no type named 'const_reference'}} - typedef typename std::allocator::template rebind::other ARO; // expected-error 2 {{no member named 'rebind'}} + typedef typename std::allocator::pointer AP; // expected-error 3 {{no type named 'pointer'}} + typedef typename std::allocator::const_pointer ACP; // expected-error 3 {{no type named 'const_pointer'}} + typedef typename std::allocator::reference AR; // expected-error 3 {{no type named 'reference'}} + typedef typename std::allocator::const_reference ACR; // expected-error 3 {{no type named 'const_reference'}} + typedef typename std::allocator::template rebind::other ARO; // expected-error 3 {{no member named 'rebind'}} } int main(int, char**) { check(); check(); + check(); return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Check that the nested types of std::allocator are provided. +// After C++17, those are not provided in the primary template and the +// explicit specialization doesn't exist anymore, so this test is moot. + +// REQUIRES: c++03 || c++11 || c++14 || c++17 + +// template <> +// class allocator +// { +// public: +// typedef void* pointer; +// typedef const void* const_pointer; +// typedef void value_type; +// +// template struct rebind {typedef allocator<_Up> other;}; +// }; + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + +#include +#include + +static_assert((std::is_same::pointer, void*>::value), ""); +static_assert((std::is_same::const_pointer, const void*>::value), ""); +static_assert((std::is_same::value_type, void>::value), ""); +static_assert((std::is_same::rebind::other, + std::allocator >::value), ""); diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// Check that allocator is deprecated in C++17. - -// REQUIRES: c++17 - -#include -#include "test_macros.h" - -int main(int, char**) -{ - typedef std::allocator::pointer AP; // expected-warning {{'allocator' is deprecated}} - typedef std::allocator::const_pointer ACP; // expected-warning {{'allocator' is deprecated}} - typedef std::allocator::rebind::other ARO; // expected-warning {{'allocator' is deprecated}} - return 0; -} diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Make sure we can use std::allocator in all Standard modes. While the +// explicit specialization for std::allocator was deprecated, using that +// specialization was neither deprecated nor removed (in C++20 it should simply +// start using the primary template). +// +// See https://llvm.org/PR50299. + +#include + +std::allocator a; diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -41,6 +41,19 @@ int A::count = 0; +template +struct StatefulArrayDeleter { + int state = 0; + + StatefulArrayDeleter(int val = 0) : state(val) {} + StatefulArrayDeleter(StatefulArrayDeleter const&) { assert(false); } + + void operator()(T* ptr) { + assert(state == 42); + delete []ptr; + } +}; + int main(int, char**) { { @@ -112,5 +125,82 @@ assert(B::count == 0); assert(A::count == 0); + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p; + p = std::move(ptr); + } + +#if TEST_STD_VER > 14 + { + StatefulArrayDeleter d; + std::unique_ptr&> u(new A[4], d); + std::shared_ptr p; + p = std::move(u); + d.state = 42; + assert(A::count == 4); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p; + p = std::move(ptr); + } +#endif // TEST_STD_VER >= 14 + return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp 2021-07-09 07:04:59.000000000 +0000 @@ -10,7 +10,7 @@ // -// template explicit shared_ptr(unique_ptr&&r); +// template shared_ptr(unique_ptr&&r); #include #include @@ -69,6 +69,19 @@ } }; +template +struct StatefulArrayDeleter { + int state = 0; + + StatefulArrayDeleter(int val = 0) : state(val) {} + StatefulArrayDeleter(StatefulArrayDeleter const&) { assert(false); } + + void operator()(T* ptr) { + assert(state == 42); + delete []ptr; + } +}; + int main(int, char**) { { @@ -135,5 +148,76 @@ std::shared_ptr s = std::move(u); } - return 0; + assert(A::count == 0); + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p(std::move(ptr)); + } + +#if TEST_STD_VER > 14 + { + StatefulArrayDeleter d; + std::unique_ptr&> u(new A[4], d); + std::shared_ptr p(std::move(u)); + d.state = 42; + assert(A::count == 4); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p(std::move(ptr)); + } +#endif // TEST_STD_VER >= 14 + + return 0; } diff -Nru llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp --- llvm-toolchain-12-12.0.0/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp 2020-10-16 21:13:08.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -13,7 +13,7 @@ // GCC's implementation of class template deduction is still immature and runs // into issues with libc++. However GCC accepts this code when compiling // against libstdc++. -// XFAIL: gcc-5, gcc-6, gcc-7 +// XFAIL: gcc-5, gcc-6, gcc-7, gcc-8, gcc-9, gcc-10, gcc-11 // diff -Nru llvm-toolchain-12-12.0.0/libcxx/utils/ci/run-buildbot llvm-toolchain-12-12.0.1/libcxx/utils/ci/run-buildbot --- llvm-toolchain-12-12.0.0/libcxx/utils/ci/run-buildbot 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/utils/ci/run-buildbot 2021-07-09 07:09:47.000000000 +0000 @@ -168,8 +168,8 @@ check-cxx-cxxabi ;; generic-gcc) - export CC=gcc - export CXX=g++ + export CC=gcc-10 + export CXX=g++-10 clean # FIXME: Re-enable experimental testing on GCC. GCC cares about the order # in which we link -lc++experimental, which causes issues. diff -Nru llvm-toolchain-12-12.0.0/libcxx/utils/gdb/libcxx/printers.py llvm-toolchain-12-12.0.1/libcxx/utils/gdb/libcxx/printers.py --- llvm-toolchain-12-12.0.0/libcxx/utils/gdb/libcxx/printers.py 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/utils/gdb/libcxx/printers.py 2021-07-09 07:09:47.000000000 +0000 @@ -13,6 +13,7 @@ from __future__ import print_function +import math import re import gdb @@ -141,7 +142,7 @@ def __next__(self): # child_iter raises StopIteration when appropriate. - field_name = self.child_iter.next() + field_name = next(self.child_iter) child = self.val["__base_"][field_name]["__value_"] self.count += 1 return ("[%d]" % self.count, child) @@ -311,12 +312,21 @@ return "%s is nullptr" % typename refcount = self.val["__cntrl_"] if refcount != 0: - usecount = refcount["__shared_owners_"] + 1 - weakcount = refcount["__shared_weak_owners_"] - if usecount == 0: - state = "expired, weak %d" % weakcount - else: - state = "count %d, weak %d" % (usecount, weakcount) + try: + usecount = refcount["__shared_owners_"] + 1 + weakcount = refcount["__shared_weak_owners_"] + if usecount == 0: + state = "expired, weak %d" % weakcount + else: + state = "count %d, weak %d" % (usecount, weakcount) + except: + # Debug info for a class with virtual functions is emitted + # in the same place as its key function. That means that + # for std::shared_ptr, __shared_owners_ is emitted into + # into libcxx.[so|a] itself, rather than into the shared_ptr + # instantiation point. So if libcxx.so was built without + # debug info, these fields will be missing. + state = "count ?, weak ? (libc++ missing debug info)" return "%s<%s> %s containing" % (typename, pointee_type, state) def __iter__(self): @@ -425,6 +435,7 @@ self.val = val self.n_words = int(self.val["__n_words"]) self.bits_per_word = int(self.val["__bits_per_word"]) + self.bit_count = self.val.type.template_argument(0) if self.n_words == 1: self.values = [int(self.val["__first_"])] else: @@ -435,21 +446,12 @@ typename = _prettify_typename(self.val.type) return "%s" % typename - def _byte_it(self, value): - index = -1 - while value: - index += 1 - will_yield = value % 2 - value /= 2 - if will_yield: - yield index - def _list_it(self): - for word_index in range(self.n_words): - current = self.values[word_index] - if current: - for n in self._byte_it(current): - yield ("[%d]" % (word_index * self.bits_per_word + n), 1) + for bit in range(self.bit_count): + word = math.floor(bit / self.bits_per_word) + word_bit = bit % self.bits_per_word + if self.values[word] & (1 << word_bit): + yield ("[%d]" % bit, 1) def __iter__(self): return self._list_it() diff -Nru llvm-toolchain-12-12.0.0/libcxx/utils/generate_feature_test_macro_components.py llvm-toolchain-12-12.0.1/libcxx/utils/generate_feature_test_macro_components.py --- llvm-toolchain-12-12.0.0/libcxx/utils/generate_feature_test_macro_components.py 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxx/utils/generate_feature_test_macro_components.py 2021-07-09 07:09:47.000000000 +0000 @@ -111,14 +111,14 @@ "name": "__cpp_lib_atomic_wait", "values": { "c++20": 201907 }, "headers": ["atomic"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait)", }, { "name": "__cpp_lib_barrier", "values": { "c++20": 201907 }, "headers": ["barrier"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier)", }, { "name": "__cpp_lib_bind_front", "values": { "c++20": 201907 }, @@ -270,6 +270,8 @@ "name": "__cpp_lib_filesystem", "values": { "c++17": 201703 }, "headers": ["filesystem"], + "depends": "!defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem)", + "internal_depends": "!defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem)" }, { "name": "__cpp_lib_gcd_lcm", "values": { "c++17": 201606 }, @@ -383,8 +385,8 @@ "name": "__cpp_lib_latch", "values": { "c++20": 201907 }, "headers": ["latch"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch)", }, { "name": "__cpp_lib_launder", "values": { "c++17": 201606 }, @@ -496,14 +498,14 @@ "name": "__cpp_lib_semaphore", "values": { "c++20": 201907 }, "headers": ["semaphore"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore)", }, { "name": "__cpp_lib_shared_mutex", "values": { "c++17": 201505 }, "headers": ["shared_mutex"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex)", }, { "name": "__cpp_lib_shared_ptr_arrays", "values": { "c++17": 201611 }, @@ -516,8 +518,8 @@ "name": "__cpp_lib_shared_timed_mutex", "values": { "c++14": 201402 }, "headers": ["shared_mutex"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex)", }, { "name": "__cpp_lib_shift", "values": { "c++20": 201806 }, diff -Nru llvm-toolchain-12-12.0.0/libcxxabi/CMakeLists.txt llvm-toolchain-12-12.0.1/libcxxabi/CMakeLists.txt --- llvm-toolchain-12-12.0.0/libcxxabi/CMakeLists.txt 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libcxxabi/CMakeLists.txt 2021-07-09 07:09:47.000000000 +0000 @@ -28,7 +28,7 @@ project(libcxxabi CXX C) set(PACKAGE_NAME libcxxabi) - set(PACKAGE_VERSION 11.0.0) + set(PACKAGE_VERSION 12.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff -Nru llvm-toolchain-12-12.0.0/libunwind/CMakeLists.txt llvm-toolchain-12-12.0.1/libunwind/CMakeLists.txt --- llvm-toolchain-12-12.0.0/libunwind/CMakeLists.txt 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/libunwind/CMakeLists.txt 2021-07-09 07:09:47.000000000 +0000 @@ -24,7 +24,7 @@ project(libunwind LANGUAGES C CXX ASM) set(PACKAGE_NAME libunwind) - set(PACKAGE_VERSION 12.0.0) + set(PACKAGE_VERSION 12.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff -Nru llvm-toolchain-12-12.0.0/lld/ELF/SyntheticSections.cpp llvm-toolchain-12-12.0.1/lld/ELF/SyntheticSections.cpp --- llvm-toolchain-12-12.0.0/lld/ELF/SyntheticSections.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/lld/ELF/SyntheticSections.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -3110,7 +3110,9 @@ void VersionTableSection::writeTo(uint8_t *buf) { buf += 2; for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) { - write16(buf, s.sym->versionId); + // Use the original versionId for an unfetched lazy symbol (undefined weak), + // which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error). + write16(buf, s.sym->isLazy() ? VER_NDX_GLOBAL : s.sym->versionId); buf += 2; } } diff -Nru llvm-toolchain-12-12.0.0/lld/test/ELF/version-script-weak.s llvm-toolchain-12-12.0.1/lld/test/ELF/version-script-weak.s --- llvm-toolchain-12-12.0.0/lld/test/ELF/version-script-weak.s 2020-10-16 21:13:08.000000000 +0000 +++ llvm-toolchain-12-12.0.1/lld/test/ELF/version-script-weak.s 2021-07-09 07:04:59.000000000 +0000 @@ -24,6 +24,19 @@ # CHECK-NEXT: Section: Undefined # CHECK-NEXT: } +## The version of an unfetched lazy symbol is VER_NDX_GLOBAL. It is not affected by version scripts. +# RUN: echo "v1 { *; };" > %t2.script +# RUN: ld.lld -shared --version-script %t2.script %t.o --start-lib %t1.o --end-lib -o %t2.so +# RUN: llvm-readelf --dyn-syms %t2.so | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: NOTYPE WEAK DEFAULT UND foo{{$}} + +# RUN: ld.lld -shared --soname=tshared --version-script %t2.script %t1.o -o %tshared.so +# RUN: ld.lld -shared --version-script %t2.script %t.o --start-lib %t1.o --end-lib %tshared.so -o %t3.so +# RUN: llvm-readelf --dyn-syms %t3.so | FileCheck %s --check-prefix=CHECK3 + +# CHECK3: NOTYPE WEAK DEFAULT UND foo@v1 + .text callq foo@PLT .weak foo diff -Nru llvm-toolchain-12-12.0.0/lld/test/lit.cfg.py llvm-toolchain-12-12.0.1/lld/test/lit.cfg.py --- llvm-toolchain-12-12.0.0/lld/test/lit.cfg.py 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/lld/test/lit.cfg.py 2021-07-09 07:09:47.000000000 +0000 @@ -101,11 +101,13 @@ tar_executable = lit.util.which('tar', config.environment['PATH']) if tar_executable: + env = os.environ + env['LANG'] = 'C' tar_version = subprocess.Popen( [tar_executable, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env={'LANG': 'C'}) + env=env) sout, _ = tar_version.communicate() if 'GNU tar' in sout.decode(): config.available_features.add('gnutar') diff -Nru llvm-toolchain-12-12.0.0/lldb/docs/man/lldb.rst llvm-toolchain-12-12.0.1/lldb/docs/man/lldb.rst --- llvm-toolchain-12-12.0.0/lldb/docs/man/lldb.rst 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/lldb/docs/man/lldb.rst 2021-07-09 07:04:59.000000000 +0000 @@ -256,11 +256,11 @@ passed to the debugged executable, arguments starting with a - must be passed after --. - lldb --arch x86_64 /path/to/program program argument -- --arch arvm7 + lldb --arch x86_64 /path/to/program program argument -- --arch armv7 For convenience, passing the executable after -- is also supported. - lldb --arch x86_64 -- /path/to/program program argument --arch arvm7 + lldb --arch x86_64 -- /path/to/program program argument --arch armv7 Passing one of the attach options causes :program:`lldb` to immediately attach to the given process. diff -Nru llvm-toolchain-12-12.0.0/lldb/tools/driver/Driver.cpp llvm-toolchain-12-12.0.1/lldb/tools/driver/Driver.cpp --- llvm-toolchain-12-12.0.0/lldb/tools/driver/Driver.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/lldb/tools/driver/Driver.cpp 2021-07-09 07:09:47.000000000 +0000 @@ -751,11 +751,11 @@ arguments passed to the debugged executable, arguments starting with a - must be passed after --. - lldb --arch x86_64 /path/to/program program argument -- --arch arvm7 + lldb --arch x86_64 /path/to/program program argument -- --arch armv7 For convenience, passing the executable after -- is also supported. - lldb --arch x86_64 -- /path/to/program program argument --arch arvm7 + lldb --arch x86_64 -- /path/to/program program argument --arch armv7 Passing one of the attach options causes lldb to immediately attach to the given process. diff -Nru llvm-toolchain-12-12.0.0/llvm/CMakeLists.txt llvm-toolchain-12-12.0.1/llvm/CMakeLists.txt --- llvm-toolchain-12-12.0.0/llvm/CMakeLists.txt 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/CMakeLists.txt 2021-07-09 07:09:47.000000000 +0000 @@ -11,7 +11,7 @@ set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX "") diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm/ADT/Any.h llvm-toolchain-12-12.0.1/llvm/include/llvm/ADT/Any.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm/ADT/Any.h 2020-10-16 21:13:08.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm/ADT/Any.h 2021-07-09 07:09:48.000000000 +0000 @@ -23,7 +23,12 @@ namespace llvm { -class Any { +class LLVM_EXTERNAL_VISIBILITY Any { + + // The `Typeid::Id` static data member below is a globally unique + // identifier for the type `T`. It is explicitly marked with default + // visibility so that when `-fvisibility=hidden` is used, the loader still + // merges duplicate definitions across DSO boundaries. template struct TypeId { static const char Id; }; struct StorageBase { diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h llvm-toolchain-12-12.0.1/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h 2021-07-09 07:09:48.000000000 +0000 @@ -316,6 +316,7 @@ LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty); + LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h llvm-toolchain-12-12.0.1/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h 2021-07-09 07:09:48.000000000 +0000 @@ -111,6 +111,16 @@ return AttributeSpecs[idx].Attr; } + bool getAttrIsImplicitConstByIndex(uint32_t idx) const { + assert(idx < AttributeSpecs.size()); + return AttributeSpecs[idx].isImplicitConst(); + } + + int64_t getAttrImplicitConstValueByIndex(uint32_t idx) const { + assert(idx < AttributeSpecs.size()); + return AttributeSpecs[idx].getImplicitConstValue(); + } + /// Get the index of the specified attribute. /// /// Searches the this abbreviation declaration for the index of the specified diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm/IR/Constant.h llvm-toolchain-12-12.0.1/llvm/include/llvm/IR/Constant.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm/IR/Constant.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm/IR/Constant.h 2021-07-09 07:09:48.000000000 +0000 @@ -214,6 +214,10 @@ /// both must either be scalars or vectors with the same element count. If no /// changes are made, the constant C is returned. static Constant *mergeUndefsWith(Constant *C, Constant *Other); + + /// Return true if a constant is ConstantData or a ConstantAggregate or + /// ConstantExpr that contain only ConstantData. + bool isManifestConstant() const; }; } // end namespace llvm diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm/Support/Host.h llvm-toolchain-12-12.0.1/llvm/include/llvm/Support/Host.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm/Support/Host.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm/Support/Host.h 2021-07-09 07:05:00.000000000 +0000 @@ -65,6 +65,20 @@ StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForBPF(); + + /// Helper functions to extract CPU details from CPUID on x86. + namespace x86 { + enum class VendorSignatures { + UNKNOWN, + GENUINE_INTEL, + AUTHENTIC_AMD, + }; + + /// Returns the host CPU's vendor. + /// MaxLeaf: if a non-nullptr pointer is specified, the EAX value will be + /// assigned to its pointee. + VendorSignatures getVendorSignature(unsigned *MaxLeaf = nullptr); + } // namespace x86 } } } diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h llvm-toolchain-12-12.0.1/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h 2021-07-09 07:09:48.000000000 +0000 @@ -396,22 +396,17 @@ bool canVectorizeOuterLoop(); /// Return true if all of the instructions in the block can be speculatively - /// executed, and record the loads/stores that require masking. If's that - /// guard loads can be ignored under "assume safety" unless \p PreserveGuards - /// is true. This can happen when we introduces guards for which the original - /// "unguarded-loads are safe" assumption does not hold. For example, the - /// vectorizer's fold-tail transformation changes the loop to execute beyond - /// its original trip-count, under a proper guard, which should be preserved. + /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know /// that we can read from them without segfault. /// \p MaskedOp is a list of instructions that have to be transformed into /// calls to the appropriate masked intrinsic when the loop is vectorized. /// \p ConditionalAssumes is a list of assume instructions in predicated /// blocks that must be dropped if the CFG gets flattened. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, - SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards = false) const; + bool blockCanBePredicated( + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + SmallPtrSetImpl &MaskedOp, + SmallPtrSetImpl &ConditionalAssumes) const; /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a diff -Nru llvm-toolchain-12-12.0.0/llvm/include/llvm-c/Core.h llvm-toolchain-12-12.0.1/llvm/include/llvm-c/Core.h --- llvm-toolchain-12-12.0.0/llvm/include/llvm-c/Core.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/include/llvm-c/Core.h 2021-07-09 07:09:48.000000000 +0000 @@ -605,6 +605,17 @@ uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A); /** + * Create a type attribute + */ +LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID, + LLVMTypeRef type_ref); + +/** + * Get the type attribute's value. + */ +LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A); + +/** * Create a string attribute. */ LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C, @@ -626,6 +637,7 @@ */ LLVMBool LLVMIsEnumAttribute(LLVMAttributeRef A); LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A); +LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A); /** * Obtain a Type from a context by its registered name. diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Analysis/ConstantFolding.cpp llvm-toolchain-12-12.0.1/llvm/lib/Analysis/ConstantFolding.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Analysis/ConstantFolding.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Analysis/ConstantFolding.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1808,19 +1808,6 @@ return APF.convertToDouble(); } -static bool isManifestConstant(const Constant *c) { - if (isa(c)) { - return true; - } else if (isa(c) || isa(c)) { - for (const Value *subc : c->operand_values()) { - if (!isManifestConstant(cast(subc))) - return false; - } - return true; - } - return false; -} - static bool getConstIntOrUndef(Value *Op, const APInt *&C) { if (auto *CI = dyn_cast(Op)) { C = &CI->getValue(); @@ -1845,7 +1832,7 @@ // We know we have a "Constant" argument. But we want to only // return true for manifest constants, not those that depend on // constants with unknowable values, e.g. GlobalValue or BlockAddress. - if (isManifestConstant(Operands[0])) + if (Operands[0]->isManifestConstant()) return ConstantInt::getTrue(Ty->getContext()); return nullptr; } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Analysis/InstructionSimplify.cpp llvm-toolchain-12-12.0.1/llvm/lib/Analysis/InstructionSimplify.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Analysis/InstructionSimplify.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Analysis/InstructionSimplify.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -4127,10 +4127,12 @@ TrueVal, FalseVal)) return V; - // If we have an equality comparison, then we know the value in one of the - // arms of the select. See if substituting this value into the arm and + // If we have a scalar equality comparison, then we know the value in one of + // the arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. - if (Pred == ICmpInst::ICMP_EQ) { + // Note that the equivalence/replacement opportunity does not hold for vectors + // because each element of a vector select is chosen independently. + if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) { if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false, MaxRecurse) == TrueVal || diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp llvm-toolchain-12-12.0.1/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -344,7 +344,9 @@ // If we hit load/store with the same invariant.group metadata (and the // same pointer operand) we can assume that value pointed by pointer // operand didn't change. - if ((isa(U) || isa(U)) && + if ((isa(U) || + (isa(U) && + cast(U)->getPointerOperand() == Ptr)) && U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Analysis/ScalarEvolution.cpp llvm-toolchain-12-12.0.1/llvm/lib/Analysis/ScalarEvolution.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Analysis/ScalarEvolution.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Analysis/ScalarEvolution.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -10622,6 +10622,10 @@ if (!dominates(RHS, IncBB)) return false; const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + // Make sure L does not refer to a value from a potentially previous + // iteration of a loop. + if (!properlyDominates(L, IncBB)) + return false; if (!ProvedEasily(L, RHS)) return false; } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Analysis/ValueTracking.cpp llvm-toolchain-12-12.0.1/llvm/lib/Analysis/ValueTracking.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Analysis/ValueTracking.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Analysis/ValueTracking.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -5150,6 +5150,9 @@ return false; } + // Limit number of instructions we look at, to avoid scanning through large + // blocks. The current limit is chosen arbitrarily. + unsigned ScanLimit = 32; BasicBlock::const_iterator End = BB->end(); if (!PoisonOnly) { @@ -5160,6 +5163,11 @@ // For example, 'udiv x, (undef | 1)' isn't UB. for (auto &I : make_range(Begin, End)) { + if (isa(I)) + continue; + if (--ScanLimit == 0) + break; + if (const auto *CB = dyn_cast(&I)) { for (unsigned i = 0; i < CB->arg_size(); ++i) { if (CB->paramHasAttr(i, Attribute::NoUndef) && @@ -5186,9 +5194,12 @@ for_each(V->users(), Propagate); Visited.insert(BB); - unsigned Iter = 0; - while (Iter++ < MaxAnalysisRecursionDepth) { + while (true) { for (auto &I : make_range(Begin, End)) { + if (isa(I)) + continue; + if (--ScanLimit == 0) + return false; if (mustTriggerUB(&I, YieldsPoison)) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -840,9 +840,8 @@ // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (CI && CI->getZExtValue() == 1 && - MRI->getType(CondLHS).getSizeInBits() == 1 && - CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && + CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1257,22 +1257,9 @@ Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; - } - case TargetOpcode::G_FPTOSI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); case TargetOpcode::G_FPEXT: if (TypeIdx != 0) return UnableToLegalize; @@ -4496,6 +4483,31 @@ return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -7105,14 +7105,22 @@ if (LegalOperations) return SDValue(); - // Collect all the stores in the chain. - SDValue Chain; - SmallVector Stores; - for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { - // TODO: Allow unordered atomics when wider type is legal (see D66309) - EVT MemVT = Store->getMemoryVT(); - if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || - !Store->isSimple() || Store->isIndexed()) + // We only handle merging simple stores of 1-4 bytes. + // TODO: Allow unordered atomics when wider type is legal (see D66309) + EVT MemVT = N->getMemoryVT(); + if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || + !N->isSimple() || N->isIndexed()) + return SDValue(); + + // Collect all of the stores in the chain. + SDValue Chain = N->getChain(); + SmallVector Stores = {N}; + while (auto *Store = dyn_cast(Chain)) { + // All stores must be the same size to ensure that we are writing all of the + // bytes in the wide value. + // TODO: We could allow multiple sizes by tracking each stored byte. + if (Store->getMemoryVT() != MemVT || !Store->isSimple() || + Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1691,9 +1691,9 @@ /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB) { +FindSplitPointForStackProtector(MachineBasicBlock *BB, + const TargetInstrInfo &TII) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); - // if (SplitPoint == BB->begin()) return SplitPoint; @@ -1701,6 +1701,31 @@ MachineBasicBlock::iterator Previous = SplitPoint; --Previous; + if (TII.isTailCall(*SplitPoint) && + Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // call itself, then we must insert before the sequence even starts. For + // example: + // + // ADJCALLSTACKDOWN ... + // + // ADJCALLSTACKUP ... + // TAILJMP somewhere + // On the other hand, it could be an unrelated call in which case this tail call + // has to register moves of its own and should be the split point. For example: + // ADJCALLSTACKDOWN + // CALL something_else + // ADJCALLSTACKUP + // + // TAILJMP somewhere + do { + --Previous; + if (Previous->isCall()) + return SplitPoint; + } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); + + return Previous; + } + while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) @@ -1740,7 +1765,7 @@ // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); @@ -1759,7 +1784,7 @@ // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/StackProtector.cpp llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/StackProtector.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/CodeGen/StackProtector.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/CodeGen/StackProtector.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -470,21 +470,36 @@ // instrumentation has already been generated. HasIRCheck = true; + // If we're instrumenting a block with a musttail call, the check has to be + // inserted before the call rather than between it and the return. The + // verifier guarantees that a musttail call is either directly before the + // return or with a single correct bitcast of the return value in between so + // we don't need to worry about many situations here. + Instruction *CheckLoc = RI; + Instruction *Prev = RI->getPrevNonDebugInstruction(); + if (Prev && isa(Prev) && cast(Prev)->isMustTailCall()) + CheckLoc = Prev; + else if (Prev) { + Prev = Prev->getPrevNonDebugInstruction(); + if (Prev && isa(Prev) && cast(Prev)->isMustTailCall()) + CheckLoc = Prev; + } + // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is // providing. if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) { // Generate the function-based epilogue instrumentation. // The target provides a guard check function, generate a call to it. - IRBuilder<> B(RI); + IRBuilder<> B(CheckLoc); LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard"); CallInst *Call = B.CreateCall(GuardCheck, {Guard}); Call->setAttributes(GuardCheck->getAttributes()); Call->setCallingConv(GuardCheck->getCallingConv()); } else { // Generate the epilogue with inline instrumentation. - // If we do not support SelectionDAG based tail calls, generate IR level - // tail calls. + // If we do not support SelectionDAG based calls, generate IR level + // calls. // // For each block with a return instruction, convert this: // @@ -514,7 +529,8 @@ BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); + BasicBlock *NewBB = + BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp llvm-toolchain-12-12.0.1/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -69,7 +69,7 @@ } } -static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, +static void dumpLocation(raw_ostream &OS, const DWARFFormValue &FormValue, DWARFUnit *U, unsigned Indent, DIDumpOptions DumpOpts) { DWARFContext &Ctx = U->getContext(); @@ -230,21 +230,22 @@ } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, - uint64_t *OffsetPtr, dwarf::Attribute Attr, - dwarf::Form Form, unsigned Indent, + const DWARFAttribute &AttrValue, unsigned Indent, DIDumpOptions DumpOpts) { if (!Die.isValid()) return; const char BaseIndent[] = " "; OS << BaseIndent; OS.indent(Indent + 2); + dwarf::Attribute Attr = AttrValue.Attr; WithColor(OS, HighlightColor::Attribute) << formatv("{0}", Attr); + dwarf::Form Form = AttrValue.Value.getForm(); if (DumpOpts.Verbose || DumpOpts.ShowForm) OS << formatv(" [{0}]", Form); DWARFUnit *U = Die.getDwarfUnit(); - DWARFFormValue FormValue = DWARFFormValue::createFromUnit(Form, U, OffsetPtr); + const DWARFFormValue &FormValue = AttrValue.Value; OS << "\t("; @@ -631,16 +632,8 @@ OS << '\n'; // Dump all data in the DIE for the attributes. - for (const auto &AttrSpec : AbbrevDecl->attributes()) { - if (AttrSpec.Form == DW_FORM_implicit_const) { - // We are dumping .debug_info section , - // implicit_const attribute values are not really stored here, - // but in .debug_abbrev section. So we just skip such attrs. - continue; - } - dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form, - Indent, DumpOpts); - } + for (const DWARFAttribute &AttrValue : attributes()) + dumpAttribute(OS, *this, AttrValue, Indent, DumpOpts); DWARFDie child = getFirstChild(); if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) { @@ -723,10 +716,16 @@ // Add the previous byte size of any previous attribute value. AttrValue.Offset += AttrValue.ByteSize; uint64_t ParseOffset = AttrValue.Offset; - auto U = Die.getDwarfUnit(); - assert(U && "Die must have valid DWARF unit"); - AttrValue.Value = DWARFFormValue::createFromUnit( - AbbrDecl.getFormByIndex(Index), U, &ParseOffset); + if (AbbrDecl.getAttrIsImplicitConstByIndex(Index)) + AttrValue.Value = DWARFFormValue::createFromSValue( + AbbrDecl.getFormByIndex(Index), + AbbrDecl.getAttrImplicitConstValueByIndex(Index)); + else { + auto U = Die.getDwarfUnit(); + assert(U && "Die must have valid DWARF unit"); + AttrValue.Value = DWARFFormValue::createFromUnit( + AbbrDecl.getFormByIndex(Index), U, &ParseOffset); + } AttrValue.ByteSize = ParseOffset - AttrValue.Offset; } else { assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only"); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp llvm-toolchain-12-12.0.1/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -168,6 +168,7 @@ case DW_FORM_line_strp: case DW_FORM_GNU_ref_alt: case DW_FORM_GNU_strp_alt: + case DW_FORM_implicit_const: if (Optional FixedSize = dwarf::getFixedFormByteSize(Form, Params)) { *OffsetPtr += *FixedSize; @@ -345,6 +346,9 @@ case DW_FORM_ref_sig8: Value.uval = Data.getU64(OffsetPtr, &Err); break; + case DW_FORM_implicit_const: + // Value has been already set by DWARFFormValue::createFromSValue. + break; default: // DWARFFormValue::skipValue() will have caught this and caused all // DWARF DIEs to fail to be parsed, so this code is not be reachable. @@ -482,6 +486,7 @@ break; case DW_FORM_sdata: + case DW_FORM_implicit_const: OS << Value.sval; break; case DW_FORM_udata: diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/IR/Constants.cpp llvm-toolchain-12-12.0.1/llvm/lib/IR/Constants.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/IR/Constants.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/IR/Constants.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -803,6 +803,18 @@ return C; } +bool Constant::isManifestConstant() const { + if (isa(this)) + return true; + if (isa(this) || isa(this)) { + for (const Value *Op : operand_values()) + if (!cast(Op)->isManifestConstant()) + return false; + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // ConstantInt //===----------------------------------------------------------------------===// diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/IR/Core.cpp llvm-toolchain-12-12.0.1/llvm/lib/IR/Core.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/IR/Core.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/IR/Core.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -164,6 +164,18 @@ return Attr.getValueAsInt(); } +LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID, + LLVMTypeRef type_ref) { + auto &Ctx = *unwrap(C); + auto AttrKind = (Attribute::AttrKind)KindID; + return wrap(Attribute::get(Ctx, AttrKind, unwrap(type_ref))); +} + +LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A) { + auto Attr = unwrap(A); + return wrap(Attr.getValueAsType()); +} + LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C, const char *K, unsigned KLength, const char *V, unsigned VLength) { @@ -194,6 +206,10 @@ return unwrap(A).isStringAttribute(); } +LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A) { + return unwrap(A).isTypeAttribute(); +} + char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI) { std::string MsgStorage; raw_string_ostream Stream(MsgStorage); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/MC/ELFObjectWriter.cpp llvm-toolchain-12-12.0.1/llvm/lib/MC/ELFObjectWriter.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/MC/ELFObjectWriter.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/MC/ELFObjectWriter.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1397,6 +1397,17 @@ if (TargetObjectWriter->getEMachine() == ELF::EM_386 && Type == ELF::R_386_GOTOFF) return true; + + // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so + // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an + // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in + // range of a MergeInputSection. We could introduce a new RelExpr member + // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12) + // but the complexity is unnecessary given that GNU as keeps the original + // symbol for this case as well. + if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS && + !hasRelocationAddend()) + return true; } // Most TLS relocations use a got, so they need the symbol. Even those that diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Support/CommandLine.cpp llvm-toolchain-12-12.0.1/llvm/lib/Support/CommandLine.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Support/CommandLine.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Support/CommandLine.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1729,7 +1729,7 @@ void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent, size_t FirstLineIndentedBy) { const StringRef ValHelpPrefix = " "; - assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size()); + assert(BaseIndent >= FirstLineIndentedBy); std::pair Split = HelpStr.split('\n'); outs().indent(BaseIndent - FirstLineIndentedBy) << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n"; diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Support/Host.cpp llvm-toolchain-12-12.0.1/llvm/lib/Support/Host.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Support/Host.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Support/Host.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -417,11 +417,6 @@ #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) -enum VendorSignatures { - SIG_INTEL = 0x756e6547 /* Genu */, - SIG_AMD = 0x68747541 /* Auth */ -}; - // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID // support. Consequently, for i386, the presence of CPUID is checked first @@ -495,6 +490,42 @@ #endif } +namespace llvm { +namespace sys { +namespace detail { +namespace x86 { + +VendorSignatures getVendorSignature(unsigned *MaxLeaf) { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + if (MaxLeaf == nullptr) + MaxLeaf = &EAX; + else + *MaxLeaf = 0; + + if (!isCpuIdSupported()) + return VendorSignatures::UNKNOWN; + + if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) + return VendorSignatures::UNKNOWN; + + // "Genu ineI ntel" + if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) + return VendorSignatures::GENUINE_INTEL; + + // "Auth enti cAMD" + if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) + return VendorSignatures::AUTHENTIC_AMD; + + return VendorSignatures::UNKNOWN; +} + +} // namespace x86 +} // namespace detail +} // namespace sys +} // namespace llvm + +using namespace llvm::sys::detail::x86; + /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return /// the 4 values in the specified arguments. If we can't run cpuid on the host, /// return true. @@ -1092,14 +1123,12 @@ } StringRef sys::getHostCPUName() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - unsigned MaxLeaf, Vendor; - - if (!isCpuIdSupported()) + unsigned MaxLeaf = 0; + const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); + if (Vendor == VendorSignatures::UNKNOWN) return "generic"; - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) - return "generic"; + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Family = 0, Model = 0; @@ -1114,10 +1143,10 @@ StringRef CPU; - if (Vendor == SIG_INTEL) { + if (Vendor == VendorSignatures::GENUINE_INTEL) { CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); - } else if (Vendor == SIG_AMD) { + } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); } @@ -1219,6 +1248,19 @@ } #else StringRef sys::getHostCPUName() { return "generic"; } +namespace llvm { +namespace sys { +namespace detail { +namespace x86 { + +VendorSignatures getVendorSignature(unsigned *MaxLeaf) { + return VendorSignatures::UNKNOWN; +} + +} // namespace x86 +} // namespace detail +} // namespace sys +} // namespace llvm #endif #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -16335,25 +16335,36 @@ unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; - // Nand not supported in LSE. - if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Leave 128 bits to LLSC. - if (Subtarget->hasLSE() && Size < 128) - return AtomicExpansionKind::None; - if (Subtarget->outlineAtomics() && Size < 128) { - // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. - // Don't outline them unless - // (1) high level support approved: - // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf - // (2) low level libgcc and compiler-rt support implemented by: - // min/max outline atomics helpers - if (AI->getOperation() != AtomicRMWInst::Min && - AI->getOperation() != AtomicRMWInst::Max && - AI->getOperation() != AtomicRMWInst::UMin && - AI->getOperation() != AtomicRMWInst::UMax) { + + // Nand is not supported in LSE. + // Leave 128 bits to LLSC or CmpXChg. + if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { + if (Subtarget->hasLSE()) return AtomicExpansionKind::None; + if (Subtarget->outlineAtomics()) { + // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. + // Don't outline them unless + // (1) high level support approved: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf + // (2) low level libgcc and compiler-rt support implemented by: + // min/max outline atomics helpers + if (AI->getOperation() != AtomicRMWInst::Min && + AI->getOperation() != AtomicRMWInst::Max && + AI->getOperation() != AtomicRMWInst::UMin && + AI->getOperation() != AtomicRMWInst::UMax) { + return AtomicExpansionKind::None; + } } } + + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1791,7 +1791,7 @@ NegOpc = AArch64::NEGv8i16; } else if (Ty == LLT::vector(16, 8)) { Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; - NegOpc = AArch64::NEGv8i16; + NegOpc = AArch64::NEGv16i8; } else if (Ty == LLT::vector(8, 8)) { Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; NegOpc = AArch64::NEGv8i8; diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -5934,6 +5934,9 @@ || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register || AddrMode == ARMII::AddrModeT2_pc // PCrel access || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST + || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE + || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE + || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR || AddrMode == ARMII::AddrModeNone) return false; @@ -5976,6 +5979,10 @@ NumBits = 8; break; case ARMII::AddrModeT2_i8s4: + // FIXME: Values are already scaled in this addressing mode. + assert((Fixup & 3) == 0 && "Can't encode this offset!"); + NumBits = 10; + break; case ARMII::AddrModeT2_ldrex: NumBits = 8; Scale = 4; @@ -5984,17 +5991,6 @@ case ARMII::AddrMode_i12: NumBits = 12; break; - case ARMII::AddrModeT2_i7: - NumBits = 7; - break; - case ARMII::AddrModeT2_i7s2: - NumBits = 7; - Scale = 2; - break; - case ARMII::AddrModeT2_i7s4: - NumBits = 7; - Scale = 4; - break; case ARMII::AddrModeT1_s: // SP-relative LD/ST NumBits = 8; Scale = 4; @@ -6004,8 +6000,8 @@ } // Make sure the offset is encodable for instructions that scale the // immediate. - if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0) - return false; + assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && + "Can't encode this offset!"); OffVal += Fixup / Scale; unsigned Mask = (1 << NumBits) - 1; diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/ARMBlockPlacement.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/ARMBlockPlacement.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/ARMBlockPlacement.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/ARMBlockPlacement.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -145,8 +145,7 @@ It++) { MachineBasicBlock *MBB = &*It; for (auto &Terminator : MBB->terminators()) { - if (Terminator.getOpcode() != ARM::t2LoopEnd && - Terminator.getOpcode() != ARM::t2LoopEndDec) + if (Terminator.getOpcode() != ARM::t2LoopEndDec) continue; MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB(); // The LE will become forwards branching if it branches to LoopExit @@ -204,10 +203,8 @@ if (!Terminator.isUnconditionalBranch()) { // The BB doesn't have an unconditional branch so it relied on // fall-through. Fix by adding an unconditional branch to the moved BB. - unsigned BrOpc = - BBUtils->isBBInRange(&Terminator, To, 254) ? ARM::tB : ARM::t2B; MachineInstrBuilder MIB = - BuildMI(From, Terminator.getDebugLoc(), TII->get(BrOpc)); + BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B)); MIB.addMBB(To); MIB.addImm(ARMCC::CondCodes::AL); MIB.addReg(ARM::NoRegister); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1467,14 +1467,15 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { auto RemovePredicate = [](MachineInstr *MI) { + if (MI->isDebugInstr()) + return; LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI); - if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) { - assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && - "Expected Then predicate!"); - MI->getOperand(PIdx).setImm(ARMVCC::None); - MI->getOperand(PIdx+1).setReg(0); - } else - llvm_unreachable("trying to unpredicate a non-predicated instruction"); + int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); + assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction"); + assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && + "Expected Then predicate!"); + MI->getOperand(PIdx).setImm(ARMVCC::None); + MI->getOperand(PIdx + 1).setReg(0); }; for (auto &Block : LoLoop.getVPTBlocks()) { @@ -1518,8 +1519,13 @@ // - Insert a new vpst to predicate the instruction(s) that following // the divergent vpr def. MachineInstr *Divergent = VPTState::getDivergent(Block); + MachineBasicBlock *MBB = Divergent->getParent(); auto DivergentNext = ++MachineBasicBlock::iterator(Divergent); + while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr()) + ++DivergentNext; + bool DivergentNextIsPredicated = + DivergentNext != MBB->end() && getVPTInstrPredicate(*DivergentNext) != ARMVCC::None; for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext; diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -960,7 +960,8 @@ // Get the value that is added to/multiplied with the phi Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp); - if (IncrementPerRound->getType() != OffsSecondOperand->getType()) + if (IncrementPerRound->getType() != OffsSecondOperand->getType() || + !L->isLoopInvariant(OffsSecondOperand)) // Something has gone wrong, abort return false; @@ -1165,6 +1166,8 @@ bool Changed = false; for (BasicBlock &BB : F) { + Changed |= SimplifyInstructionsInBlock(&BB); + for (Instruction &I : BB) { IntrinsicInst *II = dyn_cast(&I); if (II && II->getIntrinsicID() == Intrinsic::masked_gather && diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/MVETailPredication.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/MVETailPredication.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/MVETailPredication.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/MVETailPredication.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -205,6 +205,10 @@ EnableTailPredication == TailPredication::ForceEnabled; Value *ElemCount = ActiveLaneMask->getOperand(1); + bool Changed = false; + if (!L->makeLoopInvariant(ElemCount, Changed)) + return false; + auto *EC= SE->getSCEV(ElemCount); auto *TC = SE->getSCEV(TripCount); int VectorWidth = diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp 2021-07-09 07:05:00.000000000 +0000 @@ -107,6 +107,12 @@ NumInstrsSteppedOver = 0; while (Iter != EndIter) { + if (Iter->isDebugInstr()) { + // Skip debug instructions + ++Iter; + continue; + } + NextPred = getVPTInstrPredicate(*Iter, PredReg); assert(NextPred != ARMVCC::Else && "VPT block pass does not expect Else preds"); @@ -170,6 +176,8 @@ LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = std::next(BlockBeg); AddedInstIter != Iter; ++AddedInstIter) { + if (AddedInstIter->isDebugInstr()) + continue; dbgs() << " adding: "; AddedInstIter->dump(); }); @@ -197,7 +205,7 @@ if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) break; - LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump()); // Record the new size of the block BlockSize += ElseInstCnt; @@ -211,6 +219,9 @@ // Note that we are using "Iter" to iterate over the block so we can update // it at the same time. for (; Iter != VPNOTBlockEndIter; ++Iter) { + if (Iter->isDebugInstr()) + continue; + // Find the register in which the predicate is int OpIdx = findFirstVPTPredOperandIdx(*Iter); assert(OpIdx != -1); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFMIPeephole.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFMIPeephole.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFMIPeephole.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFMIPeephole.cpp 2021-07-09 07:05:00.000000000 +0000 @@ -475,6 +475,9 @@ if (MI.getOpcode() == BPF::SRL_ri && MI.getOperand(2).getImm() == 32) { SrcReg = MI.getOperand(1).getReg(); + if (!MRI->hasOneNonDBGUse(SrcReg)) + continue; + MI2 = MRI->getVRegDef(SrcReg); DstReg = MI.getOperand(0).getReg(); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFPreserveDIType.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFPreserveDIType.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFPreserveDIType.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFPreserveDIType.cpp 2021-07-09 07:05:00.000000000 +0000 @@ -85,8 +85,17 @@ } else { Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE; DIType *Ty = cast(MD); + while (auto *DTy = dyn_cast(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type) + break; + Ty = DTy->getBaseType(); + } + if (Ty->getName().empty()) report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc"); + MD = Ty; } BasicBlock *BB = Call->getParent(); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFTargetMachine.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFTargetMachine.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFTargetMachine.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFTargetMachine.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -12,6 +12,7 @@ #include "BPFTargetMachine.h" #include "BPF.h" +#include "BPFTargetTransformInfo.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "TargetInfo/BPFTargetInfo.h" #include "llvm/CodeGen/Passes.h" @@ -145,6 +146,11 @@ TargetPassConfig::addIRPasses(); } +TargetTransformInfo +BPFTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(BPFTTIImpl(this, F)); +} + // Install an instruction selector pass using // the ISelDag to gen BPF code. bool BPFPassConfig::addInstSelector() { diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFTargetMachine.h llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFTargetMachine.h --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFTargetMachine.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFTargetMachine.h 2021-07-09 07:09:48.000000000 +0000 @@ -34,6 +34,8 @@ TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFTargetTransformInfo.h llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFTargetTransformInfo.h --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BPFTargetTransformInfo.h 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BPFTargetTransformInfo.h 2021-07-09 07:09:48.000000000 +0000 @@ -0,0 +1,61 @@ +//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file uses the target's specific information to +// provide more precise answers to certain TTI queries, while letting the +// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H + +#include "BPFTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +namespace llvm { +class BPFTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const BPFSubtarget *ST; + const BPFTargetLowering *TLI; + + const BPFSubtarget *getST() const { return ST; } + const BPFTargetLowering *getTLI() const { return TLI; } + +public: + explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { + if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + + return TTI::TCC_Basic; + } + + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const llvm::Instruction *I = nullptr) { + if (Opcode == Instruction::Select) + return SCEVCheapExpansionBudget; + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BTFDebug.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BTFDebug.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BTFDebug.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BTFDebug.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -371,6 +371,21 @@ } } +BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName) + : Name(TypeName) { + Kind = BTF::BTF_KIND_FLOAT; + BTFType.Info = Kind << 24; + BTFType.Size = roundupToBytes(SizeInBits); +} + +void BTFTypeFloat::completeType(BTFDebug &BDebug) { + if (IsCompleted) + return; + IsCompleted = true; + + BTFType.NameOff = BDebug.addString(Name); +} + uint32_t BTFStringTable::addString(StringRef S) { // Check whether the string already exists. for (auto &OffsetM : OffsetToIdMap) { @@ -409,18 +424,28 @@ } void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) { - // Only int types are supported in BTF. + // Only int and binary floating point types are supported in BTF. uint32_t Encoding = BTy->getEncoding(); - if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed && - Encoding != dwarf::DW_ATE_signed_char && - Encoding != dwarf::DW_ATE_unsigned && - Encoding != dwarf::DW_ATE_unsigned_char) + std::unique_ptr TypeEntry; + switch (Encoding) { + case dwarf::DW_ATE_boolean: + case dwarf::DW_ATE_signed: + case dwarf::DW_ATE_signed_char: + case dwarf::DW_ATE_unsigned: + case dwarf::DW_ATE_unsigned_char: + // Create a BTF type instance for this DIBasicType and put it into + // DIToIdMap for cross-type reference check. + TypeEntry = std::make_unique( + Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); + break; + case dwarf::DW_ATE_float: + TypeEntry = + std::make_unique(BTy->getSizeInBits(), BTy->getName()); + break; + default: return; + } - // Create a BTF type instance for this DIBasicType and put it into - // DIToIdMap for cross-type reference check. - auto TypeEntry = std::make_unique( - Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); TypeId = addType(std::move(TypeEntry), BTy); } @@ -1171,6 +1196,7 @@ if (Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::ExternalLinkage && Linkage != GlobalValue::WeakAnyLinkage && + Linkage != GlobalValue::WeakODRLinkage && Linkage != GlobalValue::ExternalWeakLinkage) continue; @@ -1199,8 +1225,8 @@ const DataLayout &DL = Global.getParent()->getDataLayout(); uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType()); - DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global), - Size); + DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId, + Asm->getSymbol(&Global), Size); } } @@ -1278,7 +1304,19 @@ uint8_t Scope = BTF::FUNC_EXTERN; auto FuncTypeEntry = std::make_unique(SP->getName(), ProtoTypeId, Scope); - addType(std::move(FuncTypeEntry)); + uint32_t FuncId = addType(std::move(FuncTypeEntry)); + if (F->hasSection()) { + StringRef SecName = F->getSection(); + + if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) { + DataSecEntries[std::string(SecName)] = + std::make_unique(Asm, std::string(SecName)); + } + + // We really don't know func size, set it to 0. + DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId, + Asm->getSymbol(F), 0); + } } void BTFDebug::endModule() { diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BTFDebug.h llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BTFDebug.h --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BTFDebug.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BTFDebug.h 2021-07-09 07:09:48.000000000 +0000 @@ -187,7 +187,7 @@ uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size(); } - void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { + void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { Vars.push_back(std::make_tuple(Id, Sym, Size)); } std::string getName() { return Name; } @@ -195,6 +195,15 @@ void emitType(MCStreamer &OS) override; }; +/// Handle binary floating point type. +class BTFTypeFloat : public BTFTypeBase { + StringRef Name; + +public: + BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName); + void completeType(BTFDebug &BDebug) override; +}; + /// String table. class BTFStringTable { /// String table size in bytes. diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BTF.def llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BTF.def --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/BTF.def 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/BTF.def 2021-07-09 07:05:00.000000000 +0000 @@ -30,5 +30,6 @@ HANDLE_BTF_KIND(13, FUNC_PROTO) HANDLE_BTF_KIND(14, VAR) HANDLE_BTF_KIND(15, DATASEC) +HANDLE_BTF_KIND(16, FLOAT) #undef HANDLE_BTF_KIND diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/CMakeLists.txt llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/CMakeLists.txt --- llvm-toolchain-12-12.0.0/llvm/lib/Target/BPF/CMakeLists.txt 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/BPF/CMakeLists.txt 2021-07-09 07:05:00.000000000 +0000 @@ -35,6 +35,7 @@ BTFDebug.cpp LINK_COMPONENTS + Analysis AsmPrinter CodeGen Core @@ -46,6 +47,7 @@ SelectionDAG Support Target + TransformUtils ADD_TO_COMPONENT BPF diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -321,6 +321,12 @@ O << "0, "; printOperand(MI, OpNo, O); return false; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; case 'U': // Print 'u' for update form. case 'X': // Print 'x' for indexed form. // FIXME: Currently for PowerPC memory operands are always loaded diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -859,15 +859,15 @@ BuildMI(MBB, MBBI, dl, TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 : PPC::PROBED_STACKALLOC_32)) - .addDef(ScratchReg) - .addDef(TempReg) // TempReg stores the old sp. + .addDef(TempReg) + .addDef(ScratchReg) // ScratchReg stores the old sp. .addImm(NegFrameSize); // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we // update the ScratchReg to meet the assumption that ScratchReg contains // the NegFrameSize. This solution is rather tricky. if (!HasRedZone) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) - .addReg(TempReg) + .addReg(ScratchReg) .addReg(SPReg); HasSTUX = true; } @@ -1187,7 +1187,6 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1219,6 +1218,7 @@ bool HasBP = RegInfo->hasBasePointer(MF); Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, @@ -1272,212 +1272,221 @@ .addReg(SPReg) .addReg(NegSizeReg); }; - // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) - // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 - // available and r1 is already copied to r30 which is BPReg. So BPReg stores - // the value of stackptr. - // First we have to probe tail interval whose size is less than probesize, - // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, - // ScratchReg stores the value of ((stackptr % align) % probesize). Then we - // probe each block sized probesize until stackptr meets - // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized - // as negprobesize. At both stages, TempReg stores the value of - // (stackptr - (stackptr % align)). - auto dynamicProbe = [&](MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register ScratchReg, - Register TempReg) { - assert(HasBP && isPPC64 && "Probe alignment part not available"); + // Used to probe stack when realignment is required. + // Note that, according to ABI's requirement, *sp must always equals the + // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. + // Following is pseudo code: + // final_sp = (sp & align) + negframesize; + // neg_gap = final_sp - sp; + // while (neg_gap < negprobesize) { + // stdu fp, negprobesize(sp); + // neg_gap -= negprobesize; + // } + // stdux fp, sp, neg_gap + // + // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg + // before probe code, we don't need to save it, so we get one additional reg + // that can be used to materialize the probeside if needed to use xform. + // Otherwise, we can NOT materialize probeside, so we can only use Dform for + // now. + // + // The allocations are: + // if (HasBP && HasRedzone) { + // r0: materialize the probesize if needed so that we can use xform. + // r12: `neg_gap` + // } else { + // r0: back-chain pointer + // r12: `neg_gap`. + // } + auto probeRealignedStack = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register ScratchReg, Register TempReg) { + assert(HasBP && "The function is supposed to have base pointer when its " + "stack is realigned."); assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); - // ScratchReg = stackptr % align - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(BPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - // TempReg = stackptr - (stackptr % align) - BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) - .addReg(ScratchReg) - .addReg(BPReg); - // ScratchReg = (stackptr % align) % probesize - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(ScratchReg) - .addImm(0) - .addImm(64 - Log2(ProbeSize)); + + // FIXME: We can eliminate this limitation if we get more infomation about + // which part of redzone are already used. Used redzone can be treated + // probed. But there might be `holes' in redzone probed, this could + // complicate the implementation. + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing."); + + Register &FinalStackPtr = TempReg; + // FIXME: We only support NegProbeSize materializable by DForm currently. + // When HasBP && HasRedzone, we can use xform if we have an additional idle + // register. + NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materializable by DForm"); Register CRReg = PPC::CR0; - // If (stackptr % align) % probesize == 0, we should not generate probe - // code. Layout of output assembly kinda like: + // Layout of output assembly kinda like: // bb.0: // ... - // cmpldi $scratchreg, 0 - // beq bb.2 - // bb.1: # Probe tail interval - // neg $scratchreg, $scratchreg - // stdux $bpreg, r1, $scratchreg + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, + // bge bb.2 + // bb.1: + // stdu , (r1) + // sub $scratchreg, $scratchreg, negprobesize + // cmpdi $scratchreg, + // blt bb.1 // bb.2: - // - // cmpd r1, $tempreg - // beq bb.4 - // bb.3: # Loop to probe each block - // stdux $bpreg, r1, $scratchreg - // cmpd r1, $tempreg - // bne bb.3 - // bb.4: - // ... + // stdux , r1, $scratchreg MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeResidualMBB); - MachineBasicBlock *ProbeLoopPreHeaderMBB = - MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.4 - ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); - ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + // bb.2 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasRedZone) + // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg + // to TempReg to satisfy it. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + } // bb.0 - BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); - BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeLoopPreHeaderMBB); - MBB.addSuccessor(ProbeResidualMBB); - MBB.addSuccessor(ProbeLoopPreHeaderMBB); + { + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) + .addReg(SPReg) + .addReg(FinalStackPtr); + if (!HasRedZone) + BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + } // bb.1 - BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) - .addReg(ScratchReg); - allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, - false, BPReg); - ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); - // bb.2 - MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), - NegProbeSize, ScratchReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeExitMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); - // bb.3 - allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, - false, BPReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_NE) - .addReg(CRReg) - .addMBB(ProbeLoopBodyMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true /*UseDForm*/, BackChainPointer); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), + ScratchReg) + .addReg(ScratchReg) + .addImm(-NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_LT) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + } // Update liveins. - recomputeLiveIns(*ProbeResidualMBB); - recomputeLiveIns(*ProbeLoopPreHeaderMBB); recomputeLiveIns(*ProbeLoopBodyMBB); recomputeLiveIns(*ProbeExitMBB); return ProbeExitMBB; }; // For case HasBP && MaxAlign > 1, we have to realign the SP by performing - // SP = SP - SP % MaxAlign. + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { - // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in - // 64-bit mode. - if (isPPC64) { - // Use BPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); - // Since we have SPReg copied to BPReg at the moment, FPReg can be used as - // TempReg. - Register TempReg = FPReg; - CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); - // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) - .addReg(BPReg) - .addReg(BPReg); - } else { - // Initialize current frame pointer. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + // Calculate final stack pointer. + if (isPPC64) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) .addReg(SPReg) - .addReg(SPReg); - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + else BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(FPReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) - .addReg(ScratchReg) - .addReg(SPReg); - } + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) + .addReg(ScratchReg) + .addReg(SPReg); + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); } else { // Initialize current frame pointer. BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Use FPReg to calculate CFA. if (needsCFI) buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); - } - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm, FPReg); - } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, - FPReg); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*CurrentMBB, {MI}, SPReg); - } - } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(CurrentMBB->getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm, FPReg); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), CurrentMBB, - std::next(MachineBasicBlock::iterator(MI)), - CurrentMBB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); - CurrentMBB->addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); + } + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a + // CTR loop to probe. + // Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); } ++NumPrologProbed; MI.eraseFromParent(); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -167,6 +167,10 @@ // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + // Custom lower inline assembly to check for special registers. + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -3461,6 +3465,57 @@ return Op.getOperand(0); } +SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo &MFI = *MF.getInfo(); + + assert((Op.getOpcode() == ISD::INLINEASM || + Op.getOpcode() == ISD::INLINEASM_BR) && + "Expecting Inline ASM node."); + + // If an LR store is already known to be required then there is not point in + // checking this ASM as well. + if (MFI.isLRStoreRequired()) + return Op; + + // Inline ASM nodes have an optional last operand that is an incoming Flag of + // type MVT::Glue. We want to ignore this last operand if that is the case. + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) + --NumOps; + + // Check all operands that may contain the LR. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + Register Reg = cast(Op.getOperand(i))->getReg(); + if (Reg != PPC::LR && Reg != PPC::LR8) + continue; + MFI.setLRStoreRequired(); + return Op; + } + break; + } + } + } + + return Op; +} + SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) @@ -10316,6 +10371,8 @@ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INLINEASM: + case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); @@ -15090,19 +15147,45 @@ return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); + } else if (Constraint == "lr") { + if (VT == MVT::i64) + return std::make_pair(0U, &PPC::LR8RCRegClass); + else + return std::make_pair(0U, &PPC::LRRCRegClass); } - // If we name a VSX register, we can't defer to the base class because it - // will not recognize the correct register (their names will be VSL{0-31} - // and V{0-31} so they won't match). So we match them here. - if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { - int VSNum = atoi(Constraint.data() + 3); - assert(VSNum >= 0 && VSNum <= 63 && - "Attempted to access a vsr out of range"); - if (VSNum < 32) - return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); - return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + // Handle special cases of physical registers that are not properly handled + // by the base class. + if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') { + // If we name a VSX register, we can't defer to the base class because it + // will not recognize the correct register (their names will be VSL{0-31} + // and V{0-31} so they won't match). So we match them here. + if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { + int VSNum = atoi(Constraint.data() + 3); + assert(VSNum >= 0 && VSNum <= 63 && + "Attempted to access a vsr out of range"); + if (VSNum < 32) + return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); + return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + } + + // For float registers, we can't defer to the base class as it will match + // the SPILLTOVSRRC class. + if (Constraint.size() > 3 && Constraint[1] == 'f') { + int RegNum = atoi(Constraint.data() + 2); + if (RegNum > 31 || RegNum < 0) + report_fatal_error("Invalid floating point register number"); + if (VT == MVT::f32 || VT == MVT::i32) + return Subtarget.hasSPE() + ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return Subtarget.hasSPE() + ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass); + } } + std::pair R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCISelLowering.h llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCISelLowering.h --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCISelLowering.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCISelLowering.h 2021-07-09 07:09:48.000000000 +0000 @@ -1128,6 +1128,7 @@ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCRegisterInfo.td llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCRegisterInfo.td 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCRegisterInfo.td 2021-07-09 07:09:48.000000000 +0000 @@ -173,7 +173,7 @@ foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { def VSRp#!srl(Index, 1) : VSRPair("VSL"#Index), !cast("VSL"#!add(Index, 1))]>, - DwarfRegNum<[0, 0]>; + DwarfRegNum<[-1, -1]>; } // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63). @@ -181,7 +181,7 @@ def VSRp#!add(!srl(Index, 1), 16) : VSRPair("V"#Index), !cast("V"#!add(Index, 1))]>, - DwarfRegNum<[0, 0]>; + DwarfRegNum<[-1, -1]>; } } @@ -409,20 +409,27 @@ let isAllocatable = 0; } +def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> { + let isAllocatable = 0; +} +def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { + let isAllocatable = 0; +} + def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; } let SubRegIndices = [sub_pair0, sub_pair1] in { - def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>; - def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>; - def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>; - def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>; - def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>; - def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>; - def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>; - def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>; + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; } def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, ACC4, ACC5, ACC6, ACC7)> { @@ -430,14 +437,14 @@ } let SubRegIndices = [sub_pair0, sub_pair1] in { - def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>; - def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>; - def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>; - def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>; - def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>; - def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>; - def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>; - def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>; + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; } def UACCRC : RegisterClass<"PPC", [v512i1], 128, (add UACC0, UACC1, UACC2, UACC3, diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1212,6 +1212,27 @@ return BaseT::getIntrinsicInstrCost(ICA, CostKind); } +bool PPCTTIImpl::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + + // We need to ensure that argument promotion does not + // attempt to promote pointers to MMA types (__vector_pair + // and __vector_quad) since these types explicitly cannot be + // passed as arguments. Both of these types are larger than + // the 128-bit Altivec vectors and have a scalar size of 1 bit. + if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + return false; + + return llvm::none_of(Args, [](Argument *A) { + auto *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + return (EltTy->isIntOrIntVectorTy(1) && + EltTy->getPrimitiveSizeInBits() > 128); + return false; + }); +} + bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- llvm-toolchain-12-12.0.0/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h 2021-07-09 07:09:48.000000000 +0000 @@ -129,6 +129,9 @@ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl &Args) const; /// @} }; diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td llvm-toolchain-12-12.0.1/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- llvm-toolchain-12-12.0.0/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td 2021-07-09 07:09:48.000000000 +0000 @@ -3909,10 +3909,10 @@ (DecImm simm5_plus1:$rs2), GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0), + def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask VR:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK") VR:$merge, @@ -3922,17 +3922,17 @@ GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask VR:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK") VR:$merge, @@ -3950,11 +3950,11 @@ vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), - (vti.Mask VR:$merge), - (XLenVT (VLOp GPR:$vl)))), + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -285,10 +285,13 @@ // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // FIXME: Can we support these natively? + // Expand 128 bit shifts without using a libcall. setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); // We have native instructions for i8, i16 and i32 extensions, but not i1. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -1547,6 +1550,7 @@ bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(MF.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); // Detect unsupported vector argument and return types. if (Subtarget.hasVector()) { @@ -1556,7 +1560,7 @@ // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -1581,14 +1585,25 @@ if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); + unsigned ArgIndex = Outs[I].OrigArgIndex; + EVT SlotVT; + if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + // Allocate the full stack space for a promoted (and split) argument. + Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; + EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); + MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + } else { + SlotVT = Outs[I].ArgVT; + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, MachinePointerInfo::getFixedStack(MF, FI))); // If the original argument was split (e.g. i128), we need // to store all parts of it here (and pass just one address). - unsigned ArgIndex = Outs[I].OrigArgIndex; assert (Outs[I].PartOffset == 0); while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[I + 1]; @@ -1598,6 +1613,8 @@ MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); + assert((PartOffset + PartValue.getValueType().getStoreSize() <= + SlotVT.getStoreSize()) && "Not enough space for argument part!"); ++I; } ArgValue = SpillSlot; @@ -1691,7 +1708,7 @@ // Assign locations to each value returned by this call. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -885,16 +885,9 @@ SmallVector FMCArgs; for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { Constant *Clause = LPI->getClause(I); - // As a temporary workaround for the lack of aggregate varargs support - // in the interface between JS and wasm, break out filter operands into - // their component elements. - if (LPI->isFilter(I)) { - auto *ATy = cast(Clause->getType()); - for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) { - Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter"); - FMCArgs.push_back(EV); - } - } else + // TODO Handle filters (= exception specifications). + // https://bugs.llvm.org/show_bug.cgi?id=50396 + if (LPI->isCatch(I)) FMCArgs.push_back(Clause); } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/X86/X86InstrCompiler.td llvm-toolchain-12-12.0.1/llvm/lib/Target/X86/X86InstrCompiler.td --- llvm-toolchain-12-12.0.0/llvm/lib/Target/X86/X86InstrCompiler.td 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/X86/X86InstrCompiler.td 2021-07-09 07:09:48.000000000 +0000 @@ -1344,15 +1344,18 @@ // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. Any other 32-bit operation will zero-extend -// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper -// 32 bits, they're probably just qualifying a CopyFromReg. +// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying +// anything about the upper 32 bits, they're probably just qualifying a +// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit +// operation will zero-extend up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != ISD::AssertSext && - N->getOpcode() != ISD::AssertZext; + N->getOpcode() != ISD::AssertZext && + N->getOpcode() != ISD::AssertAlign && + N->getOpcode() != ISD::FREEZE; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Target/X86/X86ISelLowering.cpp llvm-toolchain-12-12.0.1/llvm/lib/Target/X86/X86ISelLowering.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Target/X86/X86ISelLowering.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Target/X86/X86ISelLowering.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -37889,6 +37889,8 @@ // replicating low and high halves (and without changing the type/length of // the vector), we don't need the shuffle. if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { + if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector()) + return SDValue(); if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Coroutines/CoroEarly.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Coroutines/CoroEarly.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Coroutines/CoroEarly.cpp 2021-02-17 08:14:29.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Coroutines/CoroEarly.cpp 2021-07-09 07:05:00.000000000 +0000 @@ -149,6 +149,7 @@ bool Changed = false; CoroIdInst *CoroId = nullptr; SmallVector CoroFrees; + bool HasCoroSuspend = false; for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) { Instruction &I = *IB++; if (auto *CB = dyn_cast(&I)) { @@ -163,6 +164,7 @@ // pass expects that there is at most one final suspend point. if (cast(&I)->isFinal()) CB->setCannotDuplicate(); + HasCoroSuspend = true; break; case Intrinsic::coro_end_async: case Intrinsic::coro_end: @@ -213,6 +215,13 @@ if (CoroId) for (CoroFreeInst *CF : CoroFrees) CF->setArgOperand(0, CoroId); + // Coroutine suspention could potentially lead to any argument modified + // outside of the function, hence arguments should not have noalias + // attributes. + if (HasCoroSuspend) + for (Argument &A : F.args()) + if (A.hasNoAliasAttr()) + A.removeAttr(Attribute::NoAlias); return Changed; } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Coroutines/CoroFrame.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Coroutines/CoroFrame.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Coroutines/CoroFrame.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -781,7 +781,13 @@ PromiseAlloca, DenseMap>{}, false); // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { - FieldIDType Id = B.addField(S.first->getType(), None); + Type *FieldType = S.first->getType(); + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + if (const Argument *A = dyn_cast(S.first)) + if (A->hasByValAttr()) + FieldType = FieldType->getPointerElementType(); + FieldIDType Id = B.addField(FieldType, None); FrameData.setFieldIndex(S.first, Id); } @@ -1149,6 +1155,7 @@ // Create a store instruction storing the value into the // coroutine frame. Instruction *InsertPt = nullptr; + bool NeedToCopyArgPtrValue = false; if (auto *Arg = dyn_cast(Def)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of @@ -1159,6 +1166,9 @@ // from the coroutine function. Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); + if (Arg->hasByValAttr()) + NeedToCopyArgPtrValue = true; + } else if (auto *CSI = dyn_cast(Def)) { // Don't spill immediately after a suspend; splitting assumes // that the suspend will be followed by a branch. @@ -1193,7 +1203,15 @@ Builder.SetInsertPoint(InsertPt); auto *G = Builder.CreateConstInBoundsGEP2_32( FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); - Builder.CreateStore(Def, G); + if (NeedToCopyArgPtrValue) { + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + auto *Value = + Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); + Builder.CreateStore(Value, G); + } else { + Builder.CreateStore(Def, G); + } BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; @@ -1207,9 +1225,12 @@ auto *GEP = GetFramePointer(E.first); GEP->setName(E.first->getName() + Twine(".reload.addr")); - CurrentReload = Builder.CreateLoad( - FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, - E.first->getName() + Twine(".reload")); + if (NeedToCopyArgPtrValue) + CurrentReload = GEP; + else + CurrentReload = Builder.CreateLoad( + FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, + E.first->getName() + Twine(".reload")); TinyPtrVector DIs = FindDbgDeclareUses(Def); for (DbgDeclareInst *DDI : DIs) { diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -3221,11 +3221,6 @@ } } - // ~(X - Y) --> ~X + Y - if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) - if (isa(X) || NotVal->hasOneUse()) - return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); - // ~(~X >>s Y) --> (X >>s Y) if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y)))) return BinaryOperator::CreateAShr(X, Y); @@ -3256,9 +3251,15 @@ return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y); } - // ~(X + C) --> -(C + 1) - X - if (match(Op0, m_Add(m_Value(X), m_Constant(C)))) - return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X); + // ~(X + C) --> ~C - X + if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C)))) + return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); + + // ~(X - Y) --> ~X + Y + // FIXME: is it really beneficial to sink the `not` here? + if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) + if (isa(X) || NotVal->hasOneUse()) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); // ~(~X + Y) --> X - Y if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y)))) diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1095,7 +1095,10 @@ /// TODO: Wrapping flags could be preserved in some cases with better analysis. Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp) { - if (!Cmp.isEquality()) + // Value equivalence substitution requires an all-or-nothing replacement. + // It does not make sense for a vector compare where each lane is chosen + // independently. + if (!Cmp.isEquality() || Cmp.getType()->isVectorTy()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp 2021-04-15 05:53:49.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -21,6 +21,30 @@ #define DEBUG_TYPE "instcombine" +bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1, + Value *ShAmt1) { + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now.. + if (ShAmt0->getType() != ShAmt1->getType()) + return false; + + // As input, we have the following pattern: + // Sh0 (Sh1 X, Q), K + // We want to rewrite that as: + // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) + // While we know that originally (Q+K) would not overflow + // (because 2 * (N-1) u<= iN -1), we have looked past extensions of + // shift amounts. so it may now overflow in smaller bitwidth. + // To ensure that does not happen, we need to ensure that the total maximal + // shift amount is still representable in that smaller bit width. + unsigned MaximalPossibleTotalShiftAmount = + (Sh0->getType()->getScalarSizeInBits() - 1) + + (Sh1->getType()->getScalarSizeInBits() - 1); + APInt MaximalRepresentableShiftAmount = + APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); + return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount); +} + // Given pattern: // (x shiftopcode Q) shiftopcode K // we should rewrite it as @@ -57,26 +81,8 @@ if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) return nullptr; - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now.. - if (ShAmt0->getType() != ShAmt1->getType()) - return nullptr; - - // As input, we have the following pattern: - // Sh0 (Sh1 X, Q), K - // We want to rewrite that as: - // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) - // While we know that originally (Q+K) would not overflow - // (because 2 * (N-1) u<= iN -1), we have looked past extensions of - // shift amounts. so it may now overflow in smaller bitwidth. - // To ensure that does not happen, we need to ensure that the total maximal - // shift amount is still representable in that smaller bit width. - unsigned MaximalPossibleTotalShiftAmount = - (Sh0->getType()->getScalarSizeInBits() - 1) + - (Sh1->getType()->getScalarSizeInBits() - 1); - APInt MaximalRepresentableShiftAmount = - APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); - if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount)) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1)) return nullptr; // We are only looking for signbit extraction if we have two right shifts. @@ -220,9 +226,9 @@ // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now. - if (MaskShAmt->getType() != ShiftShAmt->getType()) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (MaskShAmt+ShiftShAmt) ? @@ -252,9 +258,9 @@ // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now. - if (MaskShAmt->getType() != ShiftShAmt->getType()) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (ShiftShAmt-MaskShAmt) ? diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/IPO/ConstantMerge.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/IPO/ConstantMerge.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/IPO/ConstantMerge.cpp 2020-10-16 21:13:09.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/IPO/ConstantMerge.cpp 2021-07-09 07:05:00.000000000 +0000 @@ -95,6 +95,8 @@ // Only process constants with initializers in the default address space. return !GV->isConstant() || !GV->hasDefinitiveInitializer() || GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch thread-local variables. + GV->isThreadLocal() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV); } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -1081,6 +1081,12 @@ DenseSet V; collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); for (auto *I : V) { + if (I->mayHaveSideEffects()) { + LLVM_DEBUG(dbgs() << "LRR: Aborting - " + << "An instruction which does not belong to any root " + << "sets must not have side effects: " << *I); + return false; + } Uses[I].set(IL_All); } diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -43,10 +43,10 @@ "Number of 'objectsize' intrinsic calls handled"); static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) { - Value *Op = II->getOperand(0); - - return isa(Op) ? ConstantInt::getTrue(II->getType()) - : ConstantInt::getFalse(II->getType()); + if (auto *C = dyn_cast(II->getOperand(0))) + if (C->isManifestConstant()) + return ConstantInt::getTrue(II->getType()); + return ConstantInt::getFalse(II->getType()); } static bool replaceConditionalBranchesOnConstant(Instruction *II, diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Scalar/SCCP.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Scalar/SCCP.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Scalar/SCCP.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Scalar/SCCP.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -542,9 +542,14 @@ auto Iter = AdditionalUsers.find(I); if (Iter != AdditionalUsers.end()) { + // Copy additional users before notifying them of changes, because new + // users may be added, potentially invalidating the iterator. + SmallVector ToNotify; for (User *U : Iter->second) if (auto *UI = dyn_cast(U)) - OperandChangedState(UI); + ToNotify.push_back(UI); + for (Instruction *UI : ToNotify) + OperandChangedState(UI); } } void handleCallOverdefined(CallBase &CB); diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Utils/InlineFunction.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Utils/InlineFunction.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Utils/InlineFunction.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Utils/InlineFunction.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -780,7 +780,8 @@ /// When inlining a call site that has !llvm.mem.parallel_loop_access, /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should /// be propagated to all memory-accessing cloned instructions. -static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { +static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, + Function::iterator FEnd) { MDNode *MemParallelLoopAccess = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); @@ -789,41 +790,33 @@ if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!VMI->second || !isa(VMI->first)) - continue; - - Instruction *NI = dyn_cast(VMI->second); - if (!NI) - continue; - - // This metadata is only relevant for instructions that access memory. - if (!NI->mayReadOrWriteMemory()) - continue; + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // This metadata is only relevant for instructions that access memory. + if (!I.mayReadOrWriteMemory()) + continue; - if (MemParallelLoopAccess) { - // TODO: This probably should not overwrite MemParalleLoopAccess. - MemParallelLoopAccess = MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access), - MemParallelLoopAccess); - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, MemParallelLoopAccess); - } + } - if (AccessGroup) - NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( - NI->getMetadata(LLVMContext::MD_access_group), AccessGroup)); - - if (AliasScope) - NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope)); - - if (NoAlias) - NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_noalias), NoAlias)); + if (AccessGroup) + I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); + + if (AliasScope) + I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + + if (NoAlias) + I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), NoAlias)); + } } } @@ -844,9 +837,9 @@ /// subsequent remap() calls. void clone(); - /// Remap instructions in the given VMap from the original to the cloned + /// Remap instructions in the given range from the original to the cloned /// metadata. - void remap(ValueToValueMapTy &VMap); + void remap(Function::iterator FStart, Function::iterator FEnd); }; ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( @@ -907,34 +900,27 @@ } } -void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { +void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, + Function::iterator FEnd) { if (MDMap.empty()) return; // Nothing to do. - for (auto Entry : VMap) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!Entry->second || !isa(Entry->first)) - continue; - - Instruction *I = dyn_cast(Entry->second); - if (!I) - continue; - - // Only update scopes when we find them in the map. If they are not, it is - // because we already handled that instruction before. This is faster than - // tracking which instructions we already updated. - if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) - if (MDNode *MNew = MDMap.lookup(M)) - I->setMetadata(LLVMContext::MD_alias_scope, MNew); - - if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) - if (MDNode *MNew = MDMap.lookup(M)) - I->setMetadata(LLVMContext::MD_noalias, MNew); - - if (auto *Decl = dyn_cast(I)) - if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) - Decl->setScopeList(MNew); + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // TODO: The null checks for the MDMap.lookup() results should no longer + // be necessary. + if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_alias_scope, MNew); + + if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_noalias, MNew); + + if (auto *Decl = dyn_cast(&I)) + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); + } } } @@ -1926,7 +1912,7 @@ // Now clone the inlined noalias scope metadata. SAMetadataCloner.clone(); - SAMetadataCloner.remap(VMap); + SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1936,7 +1922,7 @@ AddReturnAttributes(CB, VMap); // Propagate metadata on the callsite if necessary. - PropagateCallSiteMetadata(CB, VMap); + PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); // Register any cloned assumptions. if (IFI.GetAssumptionCache) diff -Nru llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- llvm-toolchain-12-12.0.0/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp 2021-07-09 07:09:48.000000000 +0000 @@ -925,10 +925,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( BasicBlock *BB, SmallPtrSetImpl &SafePtrs, SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards) const { - const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); - + SmallPtrSetImpl &ConditionalAssumes) const { for (Instruction &I : *BB) { // Check that we don't have a constant expression that can trap as operand. for (Value *Operand : I.operands()) { @@ -956,11 +953,7 @@ if (!LI) return false; if (!SafePtrs.count(LI->getPointerOperand())) { - // !llvm.mem.parallel_loop_access implies if-conversion safety. - // Otherwise, record that the load needs (real or emulated) masking - // and let the cost model decide. - if (!IsAnnotatedParallel || PreserveGuards) - MaskedOp.insert(LI); + MaskedOp.insert(LI); continue; } } @@ -1276,8 +1269,7 @@ // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp, - TmpConditionalAssumes, - /* MaskAllLoads= */ true)) { + TmpConditionalAssumes)) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n"); return false; } diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,697 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE + +; Ensure there's no stack spill in between ldxr/stxr pairs. + +define i8 @test_rmw_add_8(i8* %dst) { +; NOLSE-LABEL: test_rmw_add_8: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrb w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB0_1 +; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB0_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrb w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxtb +; NOLSE-NEXT: b.ne .LBB0_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 +; NOLSE-NEXT: stlxrb w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB0_2 +; NOLSE-NEXT: .LBB0_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxtb +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB0_1 +; NOLSE-NEXT: b .LBB0_5 +; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_8: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddalb w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i8* %dst, i8 1 seq_cst + ret i8 %res +} + +define i16 @test_rmw_add_16(i16* %dst) { +; NOLSE-LABEL: test_rmw_add_16: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrh w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB1_1 +; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB1_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrh w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxth +; NOLSE-NEXT: b.ne .LBB1_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 +; NOLSE-NEXT: stlxrh w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB1_2 +; NOLSE-NEXT: .LBB1_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxth +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB1_1 +; NOLSE-NEXT: b .LBB1_5 +; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_16: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddalh w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i16* %dst, i16 1 seq_cst + ret i16 %res +} + +define i32 @test_rmw_add_32(i32* %dst) { +; NOLSE-LABEL: test_rmw_add_32: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB2_1 +; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB2_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w8, [x11] +; NOLSE-NEXT: cmp w8, w9 +; NOLSE-NEXT: b.ne .LBB2_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 +; NOLSE-NEXT: stlxr w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB2_2 +; NOLSE-NEXT: .LBB2_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB2_1 +; NOLSE-NEXT: b .LBB2_5 +; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_32: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddal w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i32* %dst, i32 1 seq_cst + ret i32 %res +} + +define i64 @test_rmw_add_64(i64* %dst) { +; NOLSE-LABEL: test_rmw_add_64: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0] +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB3_1 +; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB3_2 Depth 2 +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add x12, x9, #1 // =1 +; NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x8, [x11] +; NOLSE-NEXT: cmp x8, x9 +; NOLSE-NEXT: b.ne .LBB3_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 +; NOLSE-NEXT: stlxr w10, x12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB3_2 +; NOLSE-NEXT: .LBB3_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; NOLSE-NEXT: subs x9, x8, x9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB3_1 +; NOLSE-NEXT: b .LBB3_5 +; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_64: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: // kill: def $x8 killed $w8 +; LSE-NEXT: ldaddal x8, x0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i64* %dst, i64 1 seq_cst + ret i64 %res +} + +define i128 @test_rmw_add_128(i128* %dst) { +; NOLSE-LABEL: test_rmw_add_128: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #48 // =48 +; NOLSE-NEXT: .cfi_def_cfa_offset 48 +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0, #8] +; NOLSE-NEXT: ldr x9, [x0] +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB4_1 +; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB4_2 Depth 2 +; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: adds x14, x8, #1 // =1 +; NOLSE-NEXT: mov x9, xzr +; NOLSE-NEXT: adcs x15, x11, x9 +; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB4_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB4_2 +; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 +; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: cbnz x8, .LBB4_1 +; NOLSE-NEXT: b .LBB4_5 +; NOLSE-NEXT: .LBB4_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #48 // =48 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_128: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #80 // =80 +; LSE-NEXT: .cfi_def_cfa_offset 80 +; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0, #8] +; LSE-NEXT: ldr x9, [x0] +; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: b .LBB4_1 +; LSE-NEXT: .LBB4_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: adds x2, x8, #1 // =1 +; LSE-NEXT: mov x11, xzr +; LSE-NEXT: adcs x11, x10, x11 +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: caspal x0, x1, x2, x3, [x9] +; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill +; LSE-NEXT: mov x9, x1 +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 +; LSE-NEXT: mov x10, x0 +; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 +; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: cbnz x8, .LBB4_1 +; LSE-NEXT: b .LBB4_2 +; LSE-NEXT: .LBB4_2: // %atomicrmw.end +; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #80 // =80 +; LSE-NEXT: ret +entry: + %res = atomicrmw add i128* %dst, i128 1 seq_cst + ret i128 %res +} +define i8 @test_rmw_nand_8(i8* %dst) { +; NOLSE-LABEL: test_rmw_nand_8: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrb w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB5_1 +; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB5_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrb w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxtb +; NOLSE-NEXT: b.ne .LBB5_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 +; NOLSE-NEXT: stlxrb w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB5_2 +; NOLSE-NEXT: .LBB5_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxtb +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB5_1 +; NOLSE-NEXT: b .LBB5_5 +; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_8: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldrb w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB5_1 +; LSE-NEXT: .LBB5_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casalb w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9, uxtb +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB5_1 +; LSE-NEXT: b .LBB5_2 +; LSE-NEXT: .LBB5_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i8* %dst, i8 1 seq_cst + ret i8 %res +} + +define i16 @test_rmw_nand_16(i16* %dst) { +; NOLSE-LABEL: test_rmw_nand_16: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrh w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB6_1 +; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB6_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB6_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrh w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxth +; NOLSE-NEXT: b.ne .LBB6_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2 +; NOLSE-NEXT: stlxrh w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB6_2 +; NOLSE-NEXT: .LBB6_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxth +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB6_1 +; NOLSE-NEXT: b .LBB6_5 +; NOLSE-NEXT: .LBB6_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_16: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldrh w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB6_1 +; LSE-NEXT: .LBB6_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casalh w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9, uxth +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB6_1 +; LSE-NEXT: b .LBB6_2 +; LSE-NEXT: .LBB6_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i16* %dst, i16 1 seq_cst + ret i16 %res +} + +define i32 @test_rmw_nand_32(i32* %dst) { +; NOLSE-LABEL: test_rmw_nand_32: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB7_1 +; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB7_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w8, [x11] +; NOLSE-NEXT: cmp w8, w9 +; NOLSE-NEXT: b.ne .LBB7_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 +; NOLSE-NEXT: stlxr w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB7_2 +; NOLSE-NEXT: .LBB7_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB7_1 +; NOLSE-NEXT: b .LBB7_5 +; NOLSE-NEXT: .LBB7_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_32: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldr w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB7_1 +; LSE-NEXT: .LBB7_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casal w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9 +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB7_1 +; LSE-NEXT: b .LBB7_2 +; LSE-NEXT: .LBB7_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i32* %dst, i32 1 seq_cst + ret i32 %res +} + +define i64 @test_rmw_nand_64(i64* %dst) { +; NOLSE-LABEL: test_rmw_nand_64: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0] +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB8_1 +; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB8_2 Depth 2 +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mov w8, w9 +; NOLSE-NEXT: mvn w10, w8 +; NOLSE-NEXT: // implicit-def: $x8 +; NOLSE-NEXT: mov w8, w10 +; NOLSE-NEXT: orr x12, x8, #0xfffffffffffffffe +; NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x8, [x11] +; NOLSE-NEXT: cmp x8, x9 +; NOLSE-NEXT: b.ne .LBB8_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 +; NOLSE-NEXT: stlxr w10, x12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB8_2 +; NOLSE-NEXT: .LBB8_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; NOLSE-NEXT: subs x9, x8, x9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB8_1 +; NOLSE-NEXT: b .LBB8_5 +; NOLSE-NEXT: .LBB8_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_64: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0] +; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: b .LBB8_1 +; LSE-NEXT: .LBB8_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: mvn w10, w8 +; LSE-NEXT: // implicit-def: $x8 +; LSE-NEXT: mov w8, w10 +; LSE-NEXT: orr x10, x8, #0xfffffffffffffffe +; LSE-NEXT: mov x8, x9 +; LSE-NEXT: casal x8, x10, [x11] +; LSE-NEXT: str x8, [sp] // 8-byte Folded Spill +; LSE-NEXT: subs x9, x8, x9 +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: b.ne .LBB8_1 +; LSE-NEXT: b .LBB8_2 +; LSE-NEXT: .LBB8_2: // %atomicrmw.end +; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i64* %dst, i64 1 seq_cst + ret i64 %res +} + +define i128 @test_rmw_nand_128(i128* %dst) { +; NOLSE-LABEL: test_rmw_nand_128: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #48 // =48 +; NOLSE-NEXT: .cfi_def_cfa_offset 48 +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0, #8] +; NOLSE-NEXT: ldr x9, [x0] +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB9_1 +; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB9_2 Depth 2 +; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: mov w9, w8 +; NOLSE-NEXT: mvn w10, w9 +; NOLSE-NEXT: // implicit-def: $x9 +; NOLSE-NEXT: mov w9, w10 +; NOLSE-NEXT: orr x14, x9, #0xfffffffffffffffe +; NOLSE-NEXT: mov x15, #-1 +; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB9_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB9_2 +; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 +; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: cbnz x8, .LBB9_1 +; NOLSE-NEXT: b .LBB9_5 +; NOLSE-NEXT: .LBB9_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #48 // =48 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_128: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #80 // =80 +; LSE-NEXT: .cfi_def_cfa_offset 80 +; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0, #8] +; LSE-NEXT: ldr x9, [x0] +; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: b .LBB9_1 +; LSE-NEXT: .LBB9_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: mov w11, w8 +; LSE-NEXT: mvn w12, w11 +; LSE-NEXT: // implicit-def: $x11 +; LSE-NEXT: mov w11, w12 +; LSE-NEXT: orr x2, x11, #0xfffffffffffffffe +; LSE-NEXT: mov x11, #-1 +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 +; LSE-NEXT: caspal x0, x1, x2, x3, [x9] +; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill +; LSE-NEXT: mov x9, x1 +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 +; LSE-NEXT: mov x10, x0 +; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 +; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: cbnz x8, .LBB9_1 +; LSE-NEXT: b .LBB9_2 +; LSE-NEXT: .LBB9_2: // %atomicrmw.end +; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #80 // =80 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i128* %dst, i128 1 seq_cst + ret i128 %res +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s +define void @foo(i512 %in) { +; CHECK-LABEL: foo: +; CHECK: cbz + switch i512 %in, label %default [ + i512 3923188584616675477397368389504791510063972152790021570560, label %l1 + i512 3923188584616675477397368389504791510063972152790021570561, label %l2 + i512 3923188584616675477397368389504791510063972152790021570562, label %l3 + ] + +default: + ret void + +l1: + ret void + +l2: + ret void + +l3: + ret void +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir 2021-07-09 07:05:01.000000000 +0000 @@ -1,32 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s - ---- | - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - target triple = "aarch64--" - - define void @test_fptosi_s32_s32() { ret void } - define void @test_fptoui_s32_s32() { ret void } - define void @test_fptosi_s32_s64() { ret void } - define void @test_fptoui_s32_s64() { ret void } - - define void @test_fptosi_s64_s32() { ret void } - define void @test_fptoui_s64_s32() { ret void } - define void @test_fptosi_s64_s64() { ret void } - define void @test_fptoui_s64_s64() { ret void } - - define void @test_fptosi_s1_s32() { ret void } - define void @test_fptoui_s1_s32() { ret void } - - define void @test_fptosi_s8_s64() { ret void } - define void @test_fptoui_s8_s64() { ret void } - - define void @test_fptosi_s16_s32() { ret void } - define void @test_fptoui_s16_s32() { ret void } - - define void @test_fptoui_v4s32() { ret void } - define void @test_fptosi_v4s32() { ret void } -... +# RUN: llc -mtriple=aarch64-- -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_fptosi_s32_s32 @@ -265,3 +238,31 @@ %1:_(<4 x s32>) = G_FPTOSI %0 $q0 = COPY %1 ... + +--- +name: test_fptoui_s128_s32 +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: test_fptoui_s128_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s128) = G_FPTOUI [[COPY]](s32) + ; CHECK: $q0 = COPY [[FPTOUI]](s128) + %0:_(s32) = COPY $w0 + %1:_(s128) = G_FPTOUI %0 + $q0 = COPY %1 +... + +--- +name: test_fptosi_s128_s32 +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: test_fptosi_s128_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s128) = G_FPTOSI [[COPY]](s32) + ; CHECK: $q0 = COPY [[FPTOSI]](s128) + %0:_(s32) = COPY $w0 + %1:_(s128) = G_FPTOSI %0 + $q0 = COPY %1 +... diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir 2021-07-09 07:09:48.000000000 +0000 @@ -562,8 +562,8 @@ ; CHECK: liveins: $q0, $q1 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK: [[NEGv8i16_:%[0-9]+]]:fpr128 = NEGv8i16 [[COPY1]] - ; CHECK: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[NEGv8i16_]] + ; CHECK: [[NEGv16i8_:%[0-9]+]]:fpr128 = NEGv16i8 [[COPY1]] + ; CHECK: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[NEGv16i8_]] ; CHECK: $q0 = COPY [[USHLv16i8_]] ; CHECK: RET_ReallyLR implicit $q0 %0:fpr(<16 x s8>) = COPY $q0 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,66 @@ +; RUN: llc -mtriple=arm64-apple-macosx -fast-isel %s -o - -start-before=stack-protector -stop-after=stack-protector | FileCheck %s + +@var = global [2 x i64]* null + +declare void @callee() + +define void @caller1() ssp { +; CHECK-LABEL: define void @caller1() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: [[GUARD:%.*]] = call i8* @llvm.stackguard() +; CHECK: [[TOKEN:%.*]] = load volatile i8*, i8** {{%.*}} +; CHECK: [[TST:%.*]] = icmp eq i8* [[GUARD]], [[TOKEN]] +; CHECK: br i1 [[TST]] + +; CHECK: musttail call void @callee() +; CHECK-NEXT: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + musttail call void @callee() + ret void +} + +define void @justret() ssp { +; CHECK-LABEL: define void @justret() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: [[GUARD:%.*]] = call i8* @llvm.stackguard() +; CHECK: [[TOKEN:%.*]] = load volatile i8*, i8** {{%.*}} +; CHECK: [[TST:%.*]] = icmp eq i8* [[GUARD]], [[TOKEN]] +; CHECK: br i1 [[TST]] + +; CHECK: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + br label %retblock + +retblock: + ret void +} + + +declare i64* @callee2() + +define i8* @caller2() ssp { +; CHECK-LABEL: define i8* @caller2() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: [[GUARD:%.*]] = call i8* @llvm.stackguard() +; CHECK: [[TOKEN:%.*]] = load volatile i8*, i8** {{%.*}} +; CHECK: [[TST:%.*]] = icmp eq i8* [[GUARD]], [[TOKEN]] +; CHECK: br i1 [[TST]] + +; CHECK: [[TMP:%.*]] = musttail call i64* @callee2() +; CHECK-NEXT: [[RES:%.*]] = bitcast i64* [[TMP]] to i8* +; CHECK-NEXT: ret i8* [[RES]] + + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + %tmp = musttail call i64* @callee2() + %res = bitcast i64* %tmp to i8* + ret i8* %res +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir llvm-toolchain-12-12.0.1/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir 2021-07-09 07:05:01.000000000 +0000 @@ -81,23 +81,23 @@ ;CHECK-LABEL: name: CheckAddrModeT2_i8s4 ;CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[I8S4:[0-9]+]] - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 254, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg $r0 = tMOVr $r1, 14, $noreg tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg t2STRDi8 $r0, $r1, $sp, 8, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 253, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 254, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1012, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1020, 14, $noreg tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg t2STRDi8 $r0, $r1, $sp, 8, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 253, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 254, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1012, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1020, 14, $noreg tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg t2STRDi8 $r0, $r1, $sp, 8, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 253, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 254, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1012, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1020, 14, $noreg BX_RET 14, $noreg ... --- @@ -205,9 +205,9 @@ ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 2, 14 /* CC::al */, $noreg - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 10, 14 /* CC::al */, $noreg - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 255, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 16, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I12]] diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=thumbv7-windows-msvc -fast-isel %s -o - -start-before=stack-protector -stop-after=stack-protector | FileCheck %s + +@var = global [2 x i64]* null + +declare void @callee() + +define void @caller1() sspreq { +; CHECK-LABEL: define void @caller1() +; Prologue: + +; CHECK: call void @__security_check_cookie + +; CHECK: musttail call void @callee() +; CHECK-NEXT: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + musttail call void @callee() + ret void +} + +define void @justret() sspreq { +; CHECK-LABEL: define void @justret() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: call void @__security_check_cookie + +; CHECK: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + br label %retblock + +retblock: + ret void +} + + +declare i64* @callee2() + +define i8* @caller2() sspreq { +; CHECK-LABEL: define i8* @caller2() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: call void @__security_check_cookie + +; CHECK: [[TMP:%.*]] = musttail call i64* @callee2() +; CHECK-NEXT: [[RES:%.*]] = bitcast i64* [[TMP]] to i8* +; CHECK-NEXT: ret i8* [[RES]] + + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + %tmp = musttail call i64* @callee2() + %res = bitcast i64* %tmp to i8* + ret i8* %res +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,73 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o %t1 %s +; RUN: llc -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK %s +; Source code: +; struct s { +; int a; +; }; +; int test(void) { +; return __builtin_btf_type_id(*(const struct s *)0, 1); +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i32 @test() #0 !dbg !7 { +entry: + %0 = call i64 @llvm.bpf.btf.type.id(i32 0, i64 1), !dbg !11, !llvm.preserve.access.index !12 + %conv = trunc i64 %0 to i32, !dbg !11 + ret i32 %conv, !dbg !16 +} + +; CHECK: .long 1 # BTF_KIND_INT(id = 2) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 + +; CHECK: .long 16 # BTF_KIND_STRUCT(id = 4) +; CHECK-NEXT: .long 67108865 # 0x4000001 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 18 +; CHECK-NEXT: .long 2 + +; CHECK: .ascii "int" # string offset=1 +; CHECK: .ascii ".text" # string offset=10 +; CHECK: .byte 115 # string offset=16 +; CHECK: .byte 97 # string offset=18 +; CHECK: .byte 48 # string offset=20 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 10 # Field reloc section string offset=10 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 20 +; CHECK-NEXT: .long 7 + +; Function Attrs: nounwind readnone +declare i64 @llvm.bpf.btf.type.id(i32, i64) #1 + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9783e2098800b954c55ae598a1ce5c4b93444fc0)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/bpf/test") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9783e2098800b954c55ae598a1ce5c4b93444fc0)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{!10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DILocation(line: 5, column: 10, scope: !7) +!12 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 1, size: 32, elements: !14) +!14 = !{!15} +!15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !1, line: 2, baseType: !10, size: 32) +!16 = !DILocation(line: 5, column: 3, scope: !7) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/double.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/double.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/double.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/double.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,58 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s + +; Source code: +; double a; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +@a = dso_local local_unnamed_addr global double 0.000000e+00, align 8, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +; CHECK: .section .BTF,"",@progbits +; CHECK-NEXT: .short 60319 # 0xeb9f +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .long 24 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 15 +; [1] double, size=8 bytes (64 bits) +; CHECK-NEXT: .long 1 # BTF_KIND_FLOAT(id = 1) +; CHECK-NEXT: .long 268435456 # 0x10000000 +; CHECK-NEXT: .long 8 +; [2] a, type=double (1), global +; CHECK-NEXT: .long 8 # BTF_KIND_VAR(id = 2) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 1 +; [3] .bss, 1 var, {a, offset=&a, size=8 bytes} +; CHECK-NEXT: .long 10 # BTF_KIND_DATASEC(id = 3) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long a +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 0 # string offset=0 +; CHECK-NEXT: .ascii "double" # string offset=1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 97 # string offset=8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .ascii ".bss" # string offset=10 +; CHECK-NEXT: .byte 0 + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "a", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/home/yhs/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 11.0.0 "} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll 2021-07-09 07:05:01.000000000 +0000 @@ -23,9 +23,9 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .long 24 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 88 -; CHECK-NEXT: .long 88 -; CHECK-NEXT: .long 72 +; CHECK-NEXT: .long 112 +; CHECK-NEXT: .long 112 +; CHECK-NEXT: .long 76 ; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) ; CHECK-NEXT: .long 218103808 # 0xd000000 ; CHECK-NEXT: .long 2 @@ -48,6 +48,12 @@ ; CHECK-NEXT: .long 60 # BTF_KIND_FUNC(id = 6) ; CHECK-NEXT: .long 201326594 # 0xc000002 ; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 72 # BTF_KIND_DATASEC(id = 7) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long global_func +; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 0 # string offset=0 ; CHECK-NEXT: .ascii "int" # string offset=1 ; CHECK-NEXT: .byte 0 @@ -61,6 +67,7 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .ascii "global_func" # string offset=60 ; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .ascii "abc" # string offset=72 attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll 2021-07-09 07:05:01.000000000 +0000 @@ -28,8 +28,8 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .long 24 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 128 -; CHECK-NEXT: .long 128 +; CHECK-NEXT: .long 140 +; CHECK-NEXT: .long 140 ; CHECK-NEXT: .long 79 ; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) ; CHECK-NEXT: .long 218103808 # 0xd000000 @@ -58,7 +58,10 @@ ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 75 # BTF_KIND_DATASEC(id = 8) -; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 251658242 # 0xf000002 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long global_func ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 7 ; CHECK-NEXT: .long ch diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll 2021-07-09 07:05:01.000000000 +0000 @@ -28,8 +28,8 @@ ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .long 24 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 128 -; CHECK-NEXT: .long 128 +; CHECK-NEXT: .long 140 +; CHECK-NEXT: .long 140 ; CHECK-NEXT: .long 79 ; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) ; CHECK-NEXT: .long 218103808 # 0xd000000 @@ -58,7 +58,10 @@ ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 75 # BTF_KIND_DATASEC(id = 8) -; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 251658242 # 0xf000002 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long global_func ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 7 ; CHECK-NEXT: .long ch diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/float.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/float.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/float.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/float.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,58 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s + +; Source code: +; float a; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +@a = dso_local local_unnamed_addr global float 0.000000e+00, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +; CHECK: .section .BTF,"",@progbits +; CHECK-NEXT: .short 60319 # 0xeb9f +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .long 24 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 14 +; [1] float, size=4 bytes (32 bits) +; CHECK-NEXT: .long 1 # BTF_KIND_FLOAT(id = 1) +; CHECK-NEXT: .long 268435456 # 0x10000000 +; CHECK-NEXT: .long 4 +; [2] a, type=float (1), global +; CHECK-NEXT: .long 7 # BTF_KIND_VAR(id = 2) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 1 +; [3] .bss, 1 var, {a, offset=&a, size=4 bytes} +; CHECK-NEXT: .long 9 # BTF_KIND_DATASEC(id = 3) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long a +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 0 # string offset=0 +; CHECK-NEXT: .ascii "float" # string offset=1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 97 # string offset=7 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .ascii ".bss" # string offset=9 +; CHECK-NEXT: .byte 0 + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "a", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/home/yhs/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 11.0.0 "} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,86 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source code: +; const volatile char g __attribute__((weak)) = 2; +; int test() { +; return g; +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm test.c + +@g = weak_odr dso_local constant i8 2, align 1, !dbg !0 + +; Function Attrs: nofree norecurse nounwind willreturn +define dso_local i32 @test() local_unnamed_addr #0 !dbg !13 { +entry: + %0 = load volatile i8, i8* @g, align 1, !dbg !17, !tbaa !18 + %conv = sext i8 %0 to i32, !dbg !17 + ret i32 %conv, !dbg !21 +} + +; CHECK: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) +; CHECK-NEXT: .long 218103808 # 0xd000000 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 1 # BTF_KIND_INT(id = 2) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 +; CHECK-NEXT: .long 5 # BTF_KIND_FUNC(id = 3) +; CHECK-NEXT: .long 201326593 # 0xc000001 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 0 # BTF_KIND_CONST(id = 4) +; CHECK-NEXT: .long 167772160 # 0xa000000 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 0 # BTF_KIND_VOLATILE(id = 5) +; CHECK-NEXT: .long 150994944 # 0x9000000 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long 47 # BTF_KIND_INT(id = 6) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 16777224 # 0x1000008 +; CHECK-NEXT: .long 52 # BTF_KIND_VAR(id = 7) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 54 # BTF_KIND_DATASEC(id = 8) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long g +; CHECK-NEXT: .long 1 + +; CHECK: .ascii "int" # string offset=1 +; CHECK: .ascii "test" # string offset=5 +; CHECK: .ascii "char" # string offset=47 +; CHECK: .byte 103 # string offset=52 +; CHECK: .ascii ".rodata" # string offset=54 + +attributes #0 = { nofree norecurse nounwind willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9cc417cbca1cece0d55fa3d1e15682943a06139e)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/btf/tests") +!4 = !{} +!5 = !{!0} +!6 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !7) +!7 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8) +!8 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!9 = !{i32 7, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"wchar_size", i32 4} +!12 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9cc417cbca1cece0d55fa3d1e15682943a06139e)"} +!13 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 2, type: !14, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) +!14 = !DISubroutineType(types: !15) +!15 = !{!16} +!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!17 = !DILocation(line: 3, column: 10, scope: !13) +!18 = !{!19, !19, i64 0} +!19 = !{!"omnipotent char", !20, i64 0} +!20 = !{!"Simple C/C++ TBAA"} +!21 = !DILocation(line: 3, column: 3, scope: !13) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/remove_truncate_8.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/remove_truncate_8.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/BPF/remove_truncate_8.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/BPF/remove_truncate_8.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s +; Source Code: +; struct loc_prog { +; unsigned int ip; +; int len; +; }; +; int exec_prog(struct loc_prog *prog) { +; if (prog->ip < prog->len) { +; int x = prog->ip; +; if (x < 3) +; prog->ip += 2; +; } +; return 3; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm t.c + +%struct.loc_prog = type { i32, i32 } + +; Function Attrs: nofree norecurse nounwind willreturn +define dso_local i32 @exec_prog(%struct.loc_prog* nocapture %prog) local_unnamed_addr { +entry: + %ip = getelementptr inbounds %struct.loc_prog, %struct.loc_prog* %prog, i64 0, i32 0 + %0 = load i32, i32* %ip, align 4 + %len = getelementptr inbounds %struct.loc_prog, %struct.loc_prog* %prog, i64 0, i32 1 + %1 = load i32, i32* %len, align 4 + %cmp = icmp ult i32 %0, %1 + %cmp2 = icmp slt i32 %0, 3 + %or.cond = and i1 %cmp2, %cmp +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK: r{{[0-9]+}} s>>= 32 + br i1 %or.cond, label %if.then3, label %if.end5 + +if.then3: ; preds = %entry + %add = add nsw i32 %0, 2 + store i32 %add, i32* %ip, align 4 + br label %if.end5 + +if.end5: ; preds = %if.then3, %entry + ret i32 3 +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/arg_promotion.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/arg_promotion.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/arg_promotion.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/arg_promotion.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -argpromotion -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; Test to check that we do not promote arguments when the +; type size is greater than 128 bits. + +define internal fastcc void @print_acc(<512 x i1>* nocapture readonly %a) nounwind { +; CHECK-LABEL: @print_acc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, <512 x i1>* [[A:%.*]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0 +; CHECK-NEXT: ret void +; +entry: + %0 = load <512 x i1>, <512 x i1>* %a, align 64 + %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) + %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 + ret void +} + +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) nounwind + +define dso_local void @test(<512 x i1>* nocapture %a, <16 x i8> %ac) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[AC:%.*]], <16 x i8> [[AC]]) +; CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[A:%.*]], align 64 +; CHECK-NEXT: tail call fastcc void @print_acc(<512 x i1>* nonnull [[A]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %ac, <16 x i8> %ac) + store <512 x i1> %0, <512 x i1>* %a, align 64 + tail call fastcc void @print_acc(<512 x i1>* nonnull %a) + ret void +} + +declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) nounwind + +@.str = private unnamed_addr constant [11 x i8] c"Vector: { \00", align 1 +@.str.1 = private unnamed_addr constant [5 x i8] c"%d, \00", align 1 +@.str.2 = private unnamed_addr constant [6 x i8] c"%d }\0A\00", align 1 + +define internal fastcc void @printWideVec(<16 x i32> %ptr.val) nounwind { +; CHECK-LABEL: @printWideVec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i32> [[PTR_VAL:%.*]], i32 0 +; CHECK-NEXT: [[CALL1:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT]]) +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 1 +; CHECK-NEXT: [[CALL1_1:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_1]]) +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 2 +; CHECK-NEXT: [[CALL1_2:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_2]]) +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 3 +; CHECK-NEXT: [[CALL1_3:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_3]]) +; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 4 +; CHECK-NEXT: [[CALL1_4:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_4]]) +; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 5 +; CHECK-NEXT: [[CALL1_5:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_5]]) +; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 6 +; CHECK-NEXT: [[CALL1_6:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_6]]) +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 7 +; CHECK-NEXT: [[CALL3:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str.2, i64 0, i64 0), i32 signext [[VECEXT2]]) +; CHECK-NEXT: ret void +; +entry: + %call = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) + %vecext = extractelement <16 x i32> %ptr.val, i32 0 + %call1 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext) + %vecext.1 = extractelement <16 x i32> %ptr.val, i32 1 + %call1.1 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.1) + %vecext.2 = extractelement <16 x i32> %ptr.val, i32 2 + %call1.2 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.2) + %vecext.3 = extractelement <16 x i32> %ptr.val, i32 3 + %call1.3 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.3) + %vecext.4 = extractelement <16 x i32> %ptr.val, i32 4 + %call1.4 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.4) + %vecext.5 = extractelement <16 x i32> %ptr.val, i32 5 + %call1.5 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.5) + %vecext.6 = extractelement <16 x i32> %ptr.val, i32 6 + %call1.6 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.6) + %vecext2 = extractelement <16 x i32> %ptr.val, i32 7 + %call3 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str.2, i64 0, i64 0), i32 signext %vecext2) + ret void +} + +declare noundef signext i32 @printf(i8* nocapture noundef readonly, ...) nounwind + +define dso_local void @test1(<4 x i32> %a, <4 x i32> %b) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> , <16 x i32> +; CHECK-NEXT: [[VECINIT22:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: tail call fastcc void @printWideVec(<16 x i32> [[VECINIT22]]) +; CHECK-NEXT: ret void +; +entry: + %0 = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> + %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <16 x i32> + %2 = shufflevector <16 x i32> %0, <16 x i32> , <16 x i32> + %vecinit22 = shufflevector <16 x i32> %2, <16 x i32> %1, <16 x i32> + tail call fastcc void @printWideVec(<16 x i32> %vecinit22) + ret void +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/asm-template-I.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/asm-template-I.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/asm-template-I.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/asm-template-I.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-- | FileCheck %s +; https://bugs.llvm.org/show_bug.cgi?id=50608 + +define dso_local signext i32 @main(i32 signext %argc, i8** %argv) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stw 3, -4(1) +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: addi 4, 1, -4 +; CHECK-NEXT: #APP +; CHECK-NEXT: .ascii "-1@0(4)" +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr +entry: + call void asm sideeffect " .asciz \22${0:n}@${1:I}$1\22 ", "n,nZr"(i32 1, i32 %argc) + ret i32 0 +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll 2021-07-09 07:09:48.000000000 +0000 @@ -0,0 +1,26 @@ +; RUN: llc -mcpu=pwr7 -mattr=-altivec -verify-machineinstrs \ +; RUN: -mtriple=powerpc-unknown-aix < %s | FileCheck %s + +; RUN: llc -mcpu=pwr7 -mattr=-altivec -verify-machineinstrs \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s + + +define dso_local double @test_double(double %a, double %b) { +entry: + %0 = tail call double asm "fadd. $0,$1,$2\0A", "={f31},d,d,0"(double %a, double %b, double 0.000000e+00) + ret double %0 +} + +; CHECK-LABEL: test_double +; CHECK: #APP +; CHECK-NEXT: fadd. 31,1,2 + +define dso_local signext i32 @test_int(double %a, double %b) { +entry: + %0 = tail call i32 asm "fadd. $0,$1,$2\0A", "={f0},d,d,0"(double %a, double %b, i32 0) + ret i32 %0 +} + +; CHECK-LABEL: test_int +; CHECK: #APP +; CHECK-NEXT: fadd. 0,1,2 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s + +define i32 @test_f32(float %x) { +; CHECK-LABEL: test_f32: +; CHECK: #APP +; CHECK-NEXT: efsctsi 31, 3 +; CHECK-NEXT: #NO_APP +entry: + %0 = call i32 asm sideeffect "efsctsi $0, $1", "={f31},f"(float %x) + ret i32 %0 +} + +define i32 @test_f64(double %x) { +; CHECK-LABEL: test_f64: +; CHECK: #APP +; CHECK-NEXT: efdctsi 0, 3 +; CHECK-NEXT: #NO_APP +entry: + %0 = call i32 asm sideeffect "efdctsi $0, $1", "={f0},d"(double %x) + ret i32 %0 +} + diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-unknown -verify-machineinstrs %s \ +; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64LE +; RUN: llc -mtriple=powerpc64-unknown-linux-unknown -verify-machineinstrs %s \ +; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64BE + +define dso_local void @ClobberLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{lr}"() + ret void +} + +define dso_local void @ClobberR5() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberR5: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR5: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{r5}"() + ret void +} + +define dso_local void @ClobberR15() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberR15: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR15: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{r15}"() + ret void +} + +;; Test for INLINEASM_BR +define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 { +; PPC64LE-LABEL: ClobberLR_BR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp0: # Block address taken +; PPC64LE-NEXT: .LBB3_2: # %return_early +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberLR_BR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: nop +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp0: # Block address taken +; PPC64BE-NEXT: .LBB3_2: # %return_early +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +entry: + callbr void asm sideeffect "nop", "X,~{lr}"(i8* blockaddress(@ClobberLR_BR, %return_early)) + to label %return [label %return_early] + +return_early: + br label %return + +return: + %retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ] + ret i32 %retval.0 +} + +define dso_local signext i32 @ClobberR5_BR(i32 signext %in) #0 { +; PPC64LE-LABEL: ClobberR5_BR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp1: # Block address taken +; PPC64LE-NEXT: .LBB4_2: # %return_early +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR5_BR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: nop +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp1: # Block address taken +; PPC64BE-NEXT: .LBB4_2: # %return_early +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +entry: + callbr void asm sideeffect "nop", "X,~{r5}"(i8* blockaddress(@ClobberR5_BR, %return_early)) + to label %return [label %return_early] + +return_early: + br label %return + +return: + %retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ] + ret i32 %retval.0 +} + + + +define dso_local void @DefLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: DefLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: DefLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call i64 asm sideeffect "", "={lr}"() + ret void +} + +define dso_local void @EarlyClobberLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: EarlyClobberLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: EarlyClobberLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call i64 asm sideeffect "", "=&{lr}"() + ret void +} + +define dso_local void @ClobberMulti() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberMulti: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: std r16, -128(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -176(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 176 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: ld r16, -128(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberMulti: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -192(r1) +; PPC64BE-NEXT: std r15, 56(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: std r16, 64(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: ld r16, 64(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: ld r15, 56(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: addi r1, r1, 192 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{lr},~{r15},~{r16}"() + ret void +} + +attributes #0 = { nounwind } diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/pr46759.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/pr46759.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/pr46759.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/pr46759.ll 2021-07-09 07:05:01.000000000 +0000 @@ -6,32 +6,26 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-LABEL: foo: ; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: clrldi r12, r1, 53 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB0_2 -; CHECK-LE-NEXT: # %bb.1: # %entry -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB0_2: # %entry -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB0_4 -; CHECK-LE-NEXT: .LBB0_3: # %entry +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -6144 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB0_2 +; CHECK-LE-NEXT: .LBB0_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB0_3 -; CHECK-LE-NEXT: .LBB0_4: # %entry -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB0_1 +; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -52,13 +46,13 @@ ; CHECK-LE-NEXT: add r4, r1, r4 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB0_6 -; CHECK-LE-NEXT: .LBB0_5: # %entry +; CHECK-LE-NEXT: beq cr0, .LBB0_4 +; CHECK-LE-NEXT: .LBB0_3: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB0_5 -; CHECK-LE-NEXT: .LBB0_6: # %entry +; CHECK-LE-NEXT: bne cr0, .LBB0_3 +; CHECK-LE-NEXT: .LBB0_4: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll 2021-07-09 07:05:01.000000000 +0000 @@ -44,13 +44,13 @@ define i8 @f1() #0 "stack-probe-size"="0" { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: li r0, 259 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: li r12, 259 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB1_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: stdu r0, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -63,13 +63,13 @@ ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: li r0, 260 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: li r12, 260 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB1_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: stdu r0, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -82,17 +82,17 @@ ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: li r0, 257 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: li r12, 257 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB1_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r0, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 4112 ; CHECK-32-NEXT: li r3, 3 @@ -111,14 +111,14 @@ define i8 @f2() #0 { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB2_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -131,14 +131,14 @@ ; ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB2_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -151,18 +151,18 @@ ; ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB2_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-32-NEXT: li r3, 3 @@ -184,11 +184,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 @@ -199,11 +199,11 @@ ; ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 @@ -214,13 +214,13 @@ ; ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-32-NEXT: li r3, 3 @@ -291,16 +291,16 @@ define i8 @f5() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB5_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -313,16 +313,16 @@ ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -335,20 +335,20 @@ ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1048592 ; CHECK-32-NEXT: li r3, 3 @@ -370,15 +370,15 @@ define i8 @f6() #0 { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: lis r0, 4 -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: lis r12, 4 +; CHECK-LE-NEXT: ori r12, r12, 0 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB6_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -391,15 +391,15 @@ ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: lis r0, 4 -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: lis r12, 4 +; CHECK-BE-NEXT: ori r12, r12, 0 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB6_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -412,19 +412,19 @@ ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: lis r0, 4 -; CHECK-32-NEXT: nop -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: lis r12, 4 +; CHECK-32-NEXT: ori r12, r12, 0 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB6_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1073741840 ; CHECK-32-NEXT: li r3, 3 @@ -446,18 +446,18 @@ define i8 @f7() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 -; CHECK-LE-NEXT: li r0, 15258 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 13776 +; CHECK-LE-NEXT: stdux r0, r1, r12 +; CHECK-LE-NEXT: li r12, 15258 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB7_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -470,18 +470,18 @@ ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 -; CHECK-BE-NEXT: li r0, 15258 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 13760 +; CHECK-BE-NEXT: stdux r0, r1, r12 +; CHECK-BE-NEXT: li r12, 15258 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -494,22 +494,22 @@ ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 -; CHECK-32-NEXT: li r0, 15258 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 13808 +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: li r12, 15258 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1000000016 ; CHECK-32-NEXT: li r3, 3 @@ -599,31 +599,24 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f9: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 53 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB9_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -10240 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB9_2 +; CHECK-LE-NEXT: .LBB9_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB9_1 ; CHECK-LE-NEXT: .LBB9_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB9_4 -; CHECK-LE-NEXT: .LBB9_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB9_3 -; CHECK-LE-NEXT: .LBB9_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 2048 @@ -637,31 +630,24 @@ ; ; CHECK-BE-LABEL: f9: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 53 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 53 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB9_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: li r12, -10240 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB9_2 +; CHECK-BE-NEXT: .LBB9_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB9_1 ; CHECK-BE-NEXT: .LBB9_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB9_4 -; CHECK-BE-NEXT: .LBB9_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB9_3 -; CHECK-BE-NEXT: .LBB9_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -2048(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 2048 @@ -675,15 +661,23 @@ ; ; CHECK-32-LABEL: f9: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 21 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -2048(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 21 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r12, -10240 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB9_2 +; CHECK-32-NEXT: .LBB9_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB9_1 +; CHECK-32-NEXT: .LBB9_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 ; CHECK-32-NEXT: stwx r30, 0, r0 @@ -712,30 +706,24 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f10: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 54 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 54 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB10_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -5120 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB10_2 +; CHECK-LE-NEXT: .LBB10_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB10_1 ; CHECK-LE-NEXT: .LBB10_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB10_4 -; CHECK-LE-NEXT: .LBB10_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB10_3 -; CHECK-LE-NEXT: .LBB10_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -1024(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 1024 @@ -749,30 +737,24 @@ ; ; CHECK-BE-LABEL: f10: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 54 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 54 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB10_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: li r12, -5120 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB10_2 +; CHECK-BE-NEXT: .LBB10_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB10_1 ; CHECK-BE-NEXT: .LBB10_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB10_4 -; CHECK-BE-NEXT: .LBB10_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB10_3 -; CHECK-BE-NEXT: .LBB10_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -1024(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 1024 @@ -786,14 +768,23 @@ ; ; CHECK-32-LABEL: f10: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 22 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -1024(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 22 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r12, -5120 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB10_2 +; CHECK-32-NEXT: .LBB10_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB10_1 +; CHECK-32-NEXT: .LBB10_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 ; CHECK-32-NEXT: stwx r30, 0, r0 @@ -821,35 +812,26 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-LABEL: f11: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 49 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 49 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB11_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: lis r12, -2 +; CHECK-LE-NEXT: ori r12, r12, 32768 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB11_2 +; CHECK-LE-NEXT: .LBB11_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB11_1 ; CHECK-LE-NEXT: .LBB11_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB11_4 -; CHECK-LE-NEXT: .LBB11_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB11_3 -; CHECK-LE-NEXT: .LBB11_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: li r0, 24 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: .LBB11_5: -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: bdnz .LBB11_5 -; CHECK-LE-NEXT: # %bb.6: -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -876,12 +858,12 @@ ; CHECK-LE-NEXT: add r4, r1, r7 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB11_8 -; CHECK-LE-NEXT: .LBB11_7: +; CHECK-LE-NEXT: beq cr0, .LBB11_4 +; CHECK-LE-NEXT: .LBB11_3: ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB11_7 -; CHECK-LE-NEXT: .LBB11_8: +; CHECK-LE-NEXT: bne cr0, .LBB11_3 +; CHECK-LE-NEXT: .LBB11_4: ; CHECK-LE-NEXT: addi r3, r1, -32768 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 @@ -891,35 +873,26 @@ ; ; CHECK-BE-LABEL: f11: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 49 ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 49 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB11_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: lis r12, -2 +; CHECK-BE-NEXT: ori r12, r12, 32768 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB11_2 +; CHECK-BE-NEXT: .LBB11_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB11_1 ; CHECK-BE-NEXT: .LBB11_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB11_4 -; CHECK-BE-NEXT: .LBB11_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB11_3 -; CHECK-BE-NEXT: .LBB11_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: li r0, 24 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: .LBB11_5: -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: bdnz .LBB11_5 -; CHECK-BE-NEXT: # %bb.6: -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 ; CHECK-BE-NEXT: .cfi_offset r30, -16 @@ -946,12 +919,12 @@ ; CHECK-BE-NEXT: add r4, r1, r7 ; CHECK-BE-NEXT: stdux r3, r1, r5 ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: beq cr0, .LBB11_8 -; CHECK-BE-NEXT: .LBB11_7: +; CHECK-BE-NEXT: beq cr0, .LBB11_4 +; CHECK-BE-NEXT: .LBB11_3: ; CHECK-BE-NEXT: stdu r3, -4096(r1) ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: bne cr0, .LBB11_7 -; CHECK-BE-NEXT: .LBB11_8: +; CHECK-BE-NEXT: bne cr0, .LBB11_3 +; CHECK-BE-NEXT: .LBB11_4: ; CHECK-BE-NEXT: addi r3, r1, -32768 ; CHECK-BE-NEXT: lbz r3, 0(r3) ; CHECK-BE-NEXT: mr r1, r30 @@ -961,18 +934,24 @@ ; ; CHECK-32-LABEL: f11: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 17 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: li r0, 24 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: .LBB11_1: -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: bdnz .LBB11_1 -; CHECK-32-NEXT: # %bb.2: -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 17 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: lis r12, -2 +; CHECK-32-NEXT: ori r12, r12, 32768 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB11_2 +; CHECK-32-NEXT: .LBB11_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB11_1 +; CHECK-32-NEXT: .LBB11_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -4 ; CHECK-32-NEXT: stwx r31, 0, r0 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll 2020-10-16 21:13:09.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll 2021-07-09 07:05:01.000000000 +0000 @@ -44,12 +44,12 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: li r0, 259 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: li r12, 259 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB1_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: stdu r0, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -60,12 +60,12 @@ ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: li r0, 260 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: li r12, 260 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB1_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: stdu r0, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -76,16 +76,16 @@ ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: li r0, 257 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: li r12, 257 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB1_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r0, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: li r3, 3 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -102,13 +102,13 @@ define i8 @f2() #0 nounwind { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB2_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -119,13 +119,13 @@ ; ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB2_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -136,16 +136,16 @@ ; ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB2_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -166,10 +166,10 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -178,10 +178,10 @@ ; ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -190,11 +190,11 @@ ; ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -261,15 +261,15 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB5_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -280,15 +280,15 @@ ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -299,18 +299,18 @@ ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -331,14 +331,14 @@ define i8 @f6() #0 nounwind { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: lis r0, 4 -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: lis r12, 4 +; CHECK-LE-NEXT: ori r12, r12, 0 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB6_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -349,14 +349,14 @@ ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: lis r0, 4 -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: lis r12, 4 +; CHECK-BE-NEXT: ori r12, r12, 0 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB6_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -367,17 +367,17 @@ ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: lis r0, 4 -; CHECK-32-NEXT: nop -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: lis r12, 4 +; CHECK-32-NEXT: ori r12, r12, 0 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB6_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -398,17 +398,17 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 -; CHECK-LE-NEXT: li r0, 15258 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: ori r12, r12, 13776 +; CHECK-LE-NEXT: stdux r0, r1, r12 +; CHECK-LE-NEXT: li r12, 15258 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB7_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -419,17 +419,17 @@ ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 -; CHECK-BE-NEXT: li r0, 15258 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: ori r12, r12, 13760 +; CHECK-BE-NEXT: stdux r0, r1, r12 +; CHECK-BE-NEXT: li r12, 15258 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -440,20 +440,20 @@ ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 -; CHECK-32-NEXT: li r0, 15258 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: ori r12, r12, 13808 +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: li r12, 15258 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 9(r1) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1504,9 +1504,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i8.i8( @@ -1537,9 +1539,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i8.i8( @@ -1570,9 +1574,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i8.i8( @@ -1603,9 +1609,11 @@ define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i8.i8( @@ -1636,9 +1644,11 @@ define @intrinsic_vmslt_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -1669,9 +1679,11 @@ define @intrinsic_vmslt_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -1702,9 +1714,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i16.i16( @@ -1735,9 +1749,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i16.i16( @@ -1768,9 +1784,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i16.i16( @@ -1801,9 +1819,11 @@ define @intrinsic_vmslt_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -1834,9 +1854,11 @@ define @intrinsic_vmslt_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -1867,9 +1889,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i32.i32( @@ -1900,9 +1924,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i32.i32( @@ -1933,9 +1959,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -1966,9 +1994,11 @@ define @intrinsic_vmslt_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i32.i32( diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1801,9 +1801,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i8.i8( @@ -1834,9 +1836,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i8.i8( @@ -1867,9 +1871,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i8.i8( @@ -1900,9 +1906,11 @@ define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i8.i8( @@ -1933,9 +1941,11 @@ define @intrinsic_vmslt_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -1966,9 +1976,11 @@ define @intrinsic_vmslt_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -1999,9 +2011,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i16.i16( @@ -2032,9 +2046,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i16.i16( @@ -2065,9 +2081,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i16.i16( @@ -2098,9 +2116,11 @@ define @intrinsic_vmslt_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -2131,9 +2151,11 @@ define @intrinsic_vmslt_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -2164,9 +2186,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i32.i32( @@ -2197,9 +2221,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i32.i32( @@ -2230,9 +2256,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -2263,9 +2291,11 @@ define @intrinsic_vmslt_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i32.i32( @@ -2296,9 +2326,11 @@ define @intrinsic_vmslt_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 14, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i64.i64( @@ -2329,9 +2361,11 @@ define @intrinsic_vmslt_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -2362,9 +2396,11 @@ define @intrinsic_vmslt_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i64.i64( diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1504,9 +1504,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i8.i8( @@ -1537,9 +1539,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i8.i8( @@ -1570,9 +1574,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i8.i8( @@ -1603,9 +1609,11 @@ define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i8.i8( @@ -1636,9 +1644,11 @@ define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -1669,9 +1679,11 @@ define @intrinsic_vmsltu_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -1702,9 +1714,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i16.i16( @@ -1735,9 +1749,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsne.vv v9, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsne.vv v25, v8, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i16.i16( @@ -1768,9 +1784,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i16.i16( @@ -1801,9 +1819,11 @@ define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -1834,9 +1854,11 @@ define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -1867,9 +1889,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i32.i32( @@ -1900,9 +1924,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i32.i32( @@ -1933,9 +1959,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -1966,9 +1994,11 @@ define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i32.i32( diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1801,9 +1801,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i8.i8( @@ -1834,9 +1836,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i8.i8( @@ -1867,9 +1871,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i8.i8( @@ -1900,9 +1906,11 @@ define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i8.i8( @@ -1933,9 +1941,11 @@ define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -1966,9 +1976,11 @@ define @intrinsic_vmsltu_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -1999,9 +2011,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i16.i16( @@ -2032,9 +2046,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsne.vv v9, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsne.vv v25, v8, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i16.i16( @@ -2065,9 +2081,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i16.i16( @@ -2098,9 +2116,11 @@ define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -2131,9 +2151,11 @@ define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -2164,9 +2186,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i32.i32( @@ -2197,9 +2221,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i32.i32( @@ -2230,9 +2256,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -2263,9 +2291,11 @@ define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i32.i32( @@ -2296,9 +2326,11 @@ define @intrinsic_vmsltu_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 14, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i64.i64( @@ -2329,9 +2361,11 @@ define @intrinsic_vmsltu_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -2362,9 +2396,11 @@ define @intrinsic_vmsltu_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i64.i64( diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/SystemZ/args-11.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/SystemZ/args-11.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/SystemZ/args-11.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/SystemZ/args-11.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test outgoing promoted arguments that are split (and passed by reference). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; The i96 arg is promoted to i128 and should get the full stack space. +declare void @fn1(i96) +define i32 @fn2() { +; CHECK-LABEL: fn2: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -184 +; CHECK-NEXT: .cfi_def_cfa_offset 344 +; CHECK-NEXT: mvhi 180(%r15), -1 +; CHECK-NEXT: mvghi 168(%r15), 0 +; CHECK-NEXT: la %r2, 160(%r15) +; CHECK-NEXT: mvghi 160(%r15), 0 +; CHECK-NEXT: brasl %r14, fn1@PLT +; CHECK-NEXT: l %r2, 180(%r15) +; CHECK-NEXT: lmg %r14, %r15, 296(%r15) +; CHECK-NEXT: br %r14 + %1 = alloca i32 + store i32 -1, i32* %1 + call void @fn1(i96 0) + %2 = load i32, i32* %1 + ret i32 %2 +} + +declare void @fn3(i136) +define i32 @fn4() { +; CHECK-LABEL: fn4: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -192 +; CHECK-NEXT: .cfi_def_cfa_offset 352 +; CHECK-NEXT: mvhi 188(%r15), -1 +; CHECK-NEXT: mvghi 176(%r15), 0 +; CHECK-NEXT: mvghi 168(%r15), 0 +; CHECK-NEXT: la %r2, 160(%r15) +; CHECK-NEXT: mvghi 160(%r15), 0 +; CHECK-NEXT: brasl %r14, fn3@PLT +; CHECK-NEXT: l %r2, 188(%r15) +; CHECK-NEXT: lmg %r14, %r15, 304(%r15) +; CHECK-NEXT: br %r14 + %1 = alloca i32 + store i32 -1, i32* %1 + call void @fn3(i136 0) + %2 = load i32, i32* %1 + ret i32 %2 +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/SystemZ/shift-12.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/SystemZ/shift-12.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/SystemZ/shift-12.ll 2020-10-16 21:13:09.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/SystemZ/shift-12.ll 2021-07-09 07:05:01.000000000 +0000 @@ -2,7 +2,7 @@ ; Test removal of AND operations that don't affect last 6 bits of shift amount ; operand. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s ; Test that AND is not removed when some lower 6 bits are not set. define i32 @f1(i32 %a, i32 %sh) { @@ -119,31 +119,28 @@ ret i32 %reuse } -; Test that AND is not removed for i128 (which calls __ashlti3) define i128 @f11(i128 %a, i32 %sh) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r13, %r15, 104(%r15) -; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: aghi %r15, -192 -; CHECK-NEXT: .cfi_def_cfa_offset 352 ; CHECK-NEXT: lg %r0, 8(%r3) -; CHECK-NEXT: # kill: def $r4l killed $r4l def $r4d -; CHECK-NEXT: lgr %r13, %r2 ; CHECK-NEXT: lg %r1, 0(%r3) -; CHECK-NEXT: stg %r0, 168(%r15) -; CHECK-NEXT: risbg %r4, %r4, 57, 191, 0 -; CHECK-NEXT: la %r2, 176(%r15) -; CHECK-NEXT: la %r3, 160(%r15) -; CHECK-NEXT: stg %r1, 160(%r15) -; CHECK-NEXT: brasl %r14, __ashlti3@PLT -; CHECK-NEXT: lg %r0, 184(%r15) -; CHECK-NEXT: lg %r1, 176(%r15) -; CHECK-NEXT: stg %r0, 8(%r13) -; CHECK-NEXT: stg %r1, 0(%r13) -; CHECK-NEXT: lmg %r13, %r15, 296(%r15) +; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 +; CHECK-NEXT: lcr %r14, %r3 +; CHECK-NEXT: sllg %r5, %r1, 0(%r4) +; CHECK-NEXT: srlg %r14, %r0, 0(%r14) +; CHECK-NEXT: ogr %r5, %r14 +; CHECK-NEXT: sllg %r3, %r0, -64(%r3) +; CHECK-NEXT: tmll %r4, 127 +; CHECK-NEXT: locgrle %r3, %r5 +; CHECK-NEXT: sllg %r0, %r0, 0(%r4) +; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locghinle %r0, 0 +; CHECK-NEXT: stg %r0, 8(%r2) +; CHECK-NEXT: stg %r3, 0(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 %ext = zext i32 %and to i128 @@ -151,3 +148,62 @@ ret i128 %shift } +define i128 @f12(i128 %a, i32 %sh) { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: lg %r0, 0(%r3) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 +; CHECK-NEXT: lcr %r14, %r3 +; CHECK-NEXT: srlg %r5, %r1, 0(%r4) +; CHECK-NEXT: sllg %r14, %r0, 0(%r14) +; CHECK-NEXT: ogr %r5, %r14 +; CHECK-NEXT: srlg %r3, %r0, -64(%r3) +; CHECK-NEXT: tmll %r4, 127 +; CHECK-NEXT: locgrle %r3, %r5 +; CHECK-NEXT: srlg %r0, %r0, 0(%r4) +; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locghinle %r0, 0 +; CHECK-NEXT: stg %r0, 0(%r2) +; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) +; CHECK-NEXT: br %r14 + %and = and i32 %sh, 127 + %ext = zext i32 %and to i128 + %shift = lshr i128 %a, %ext + ret i128 %shift +} + +define i128 @f13(i128 %a, i32 %sh) { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: lg %r0, 0(%r3) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 +; CHECK-NEXT: lcr %r14, %r3 +; CHECK-NEXT: srlg %r5, %r1, 0(%r4) +; CHECK-NEXT: sllg %r14, %r0, 0(%r14) +; CHECK-NEXT: ogr %r5, %r14 +; CHECK-NEXT: srag %r14, %r0, 0(%r4) +; CHECK-NEXT: srag %r3, %r0, -64(%r3) +; CHECK-NEXT: srag %r0, %r0, 63 +; CHECK-NEXT: tmll %r4, 127 +; CHECK-NEXT: locgrle %r3, %r5 +; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locgrle %r0, %r14 +; CHECK-NEXT: stg %r0, 0(%r2) +; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) +; CHECK-NEXT: br %r14 + %and = and i32 %sh, 127 + %ext = zext i32 %and to i128 + %shift = ashr i128 %a, %ext + ret i128 %shift +} + diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/block-placement.mir llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/block-placement.mir --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/block-placement.mir 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/block-placement.mir 2021-07-09 07:09:48.000000000 +0000 @@ -48,7 +48,7 @@ ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: t2WhileLoopStart killed renamable $r0, %bb.1, implicit-def dead $cpsr - ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.3: @@ -145,7 +145,7 @@ ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2WhileLoopStart killed renamable $lr, %bb.1, implicit-def dead $cpsr - ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll 2021-07-09 07:09:48.000000000 +0000 @@ -48,22 +48,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -147,22 +138,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -205,13 +187,12 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB2_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r4, r12, #3 -; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: bic r4, r4, #3 -; CHECK-NEXT: sub.w lr, r4, #4 +; CHECK-NEXT: add.w lr, r12, #3 ; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: bic lr, lr, #3 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: sub.w lr, lr, #4 ; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -222,12 +203,11 @@ ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill ; CHECK-NEXT: vsub.i32 q1, q2, q1 -; CHECK-NEXT: adds r4, #4 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vcmpt.i32 eq, q1, zr ; CHECK-NEXT: vldrwt.u32 q1, [r3], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r2], #16 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB2_2 @@ -249,22 +229,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -304,13 +275,12 @@ ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB3_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r4, r12, #3 -; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: bic r4, r4, #3 -; CHECK-NEXT: sub.w lr, r4, #4 +; CHECK-NEXT: add.w lr, r12, #3 ; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: bic lr, lr, #3 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: sub.w lr, lr, #4 ; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -326,9 +296,8 @@ ; CHECK-NEXT: vcmpt.i32 ne, q1, zr ; CHECK-NEXT: vldrwe.u32 q1, [r3], #16 ; CHECK-NEXT: vldrwe.u32 q2, [r2], #16 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB3_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -348,22 +317,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -402,11 +362,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %bb3 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_2: @ %bb9 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [r0] @@ -423,21 +381,12 @@ bb3: ; preds = %bb %tmp4 = add i32 %arg2, 3 %tmp5 = and i32 %tmp4, -4 - %tmp6 = add i32 %arg2, -1 - %tmp7 = insertelement <4 x i32> undef, i32 %tmp6, i32 0 - %tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer br label %bb9 bb9: ; preds = %bb9, %bb3 %tmp10 = phi i32 [ 0, %bb3 ], [ %tmp25, %bb9 ] - %tmp11 = insertelement <4 x i32> undef, i32 %tmp10, i32 0 - %tmp12 = shufflevector <4 x i32> %tmp11, <4 x i32> undef, <4 x i32> zeroinitializer - %tmp13 = add <4 x i32> %tmp12, %tmp14 = getelementptr inbounds i32, i32* %arg1, i32 %tmp10 - - ; %tmp15 = icmp ule <4 x i32> %tmp13, %tmp8 %tmp15 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %tmp10, i32 %arg2) - %tmp16 = bitcast i32* %tmp14 to <4 x i32>* %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp16, i32 4, <4 x i1> %tmp15, <4 x i32> undef) %tmp18 = icmp ne <4 x i32> %tmp17, zeroinitializer @@ -464,7 +413,6 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %bb4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -472,7 +420,6 @@ ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q0, r2 ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 @@ -486,23 +433,14 @@ bb4: ; preds = %bb %tmp5 = add i32 %arg3, 3 %tmp6 = and i32 %tmp5, -4 - %tmp7 = add i32 %arg3, -1 - %tmp8 = insertelement <4 x i32> undef, i32 %tmp7, i32 0 - %tmp9 = shufflevector <4 x i32> %tmp8, <4 x i32> undef, <4 x i32> zeroinitializer %tmp10 = insertelement <4 x i32> undef, i32 %arg2, i32 0 %tmp11 = shufflevector <4 x i32> %tmp10, <4 x i32> undef, <4 x i32> zeroinitializer br label %bb12 bb12: ; preds = %bb12, %bb4 %tmp13 = phi i32 [ 0, %bb4 ], [ %tmp30, %bb12 ] - %tmp14 = insertelement <4 x i32> undef, i32 %tmp13, i32 0 - %tmp15 = shufflevector <4 x i32> %tmp14, <4 x i32> undef, <4 x i32> zeroinitializer - %tmp16 = add <4 x i32> %tmp15, %tmp17 = getelementptr inbounds i32, i32* %arg, i32 %tmp13 - - ; %tmp18 = icmp ule <4 x i32> %tmp16, %tmp9 %tmp18= call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %tmp13, i32 %arg3) - %tmp19 = bitcast i32* %tmp17 to <4 x i32>* %tmp20 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp19, i32 4, <4 x i1> %tmp18, <4 x i32> undef) %tmp21 = icmp ne <4 x i32> %tmp20, zeroinitializer diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll 2021-07-09 07:05:01.000000000 +0000 @@ -9,11 +9,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vldrb.s16 q0, [r1], #8 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vadd.i16 q0, q1, q0 @@ -28,21 +26,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = or <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - - ; %1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = sext <8 x i8> %wide.masked.load to <8 x i16> @@ -69,11 +58,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vldrb.u16 q0, [r1], #8 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vadd.i16 q0, q1, q0 @@ -88,21 +75,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = or <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - - ; %1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %wide.masked.load to <8 x i16> @@ -129,11 +107,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB2_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrh.s32 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vadd.i32 q0, q1, q0 @@ -148,21 +124,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -189,11 +156,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrh.u32 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vadd.i32 q0, q1, q0 @@ -208,21 +173,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll 2021-07-09 07:09:48.000000000 +0000 @@ -34,11 +34,9 @@ ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .LBB0_4: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 ; CHECK-NEXT: vmul.f32 q0, q1, q0 @@ -122,21 +120,12 @@ vector.ph: ; preds = %vector.memcheck %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert21 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat22 = shufflevector <4 x i32> %broadcast.splatinsert21, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds float, float* %b, i32 %index - - ; %3 = icmp ule <4 x i32> %induction, %broadcast.splat22 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast float* %2 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %3, <4 x float> undef) %5 = getelementptr inbounds float, float* %c, i32 %index @@ -225,12 +214,10 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: add.w lr, r12, r3, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpstt @@ -262,22 +249,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %6, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %c, i32 %index diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll 2021-07-09 07:09:48.000000000 +0000 @@ -16,12 +16,10 @@ ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -40,9 +38,6 @@ %conv = zext i8 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -50,14 +45,8 @@ vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -92,12 +81,10 @@ ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -116,9 +103,6 @@ %conv = sext i16 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -126,14 +110,8 @@ vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -168,12 +146,10 @@ ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -192,9 +168,6 @@ %conv = zext i8 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -202,14 +175,8 @@ vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -244,12 +211,10 @@ ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -268,9 +233,6 @@ %conv = sext i16 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -278,14 +240,8 @@ vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> @@ -320,12 +276,10 @@ ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -343,9 +297,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %a, i32 0 %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -353,14 +304,8 @@ vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat10 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat12 @@ -413,11 +358,9 @@ ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB5_8 ; CHECK-NEXT: .LBB5_4: @ %vector.ph -; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB5_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -500,23 +443,14 @@ vector.ph: ; preds = %for.body.lr.ph %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert22 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat23 = shufflevector <4 x i32> %broadcast.splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds i8, i8* %a, i32 %index - - ; %3 = icmp ule <4 x i32> %induction, %broadcast.splat20 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i8* %2 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef) %5 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -620,11 +554,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB6_1: @ %vector.ph -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrh.s32 q0, [r0], #8 ; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -640,23 +572,14 @@ %conv3 = sext i16 %c to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %a, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat13 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -711,11 +634,9 @@ ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB7_8 ; CHECK-NEXT: .LBB7_4: @ %vector.ph -; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB7_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -798,23 +719,14 @@ vector.ph: ; preds = %for.body.lr.ph %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert22 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat23 = shufflevector <4 x i32> %broadcast.splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds i8, i8* %a, i32 %index - -; %3 = icmp ule <4 x i32> %induction, %broadcast.splat20 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i8* %2 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef) %5 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -918,11 +830,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrh.u32 q0, [r0], #8 ; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -938,23 +848,14 @@ %conv3 = sext i16 %c to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %a, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat13 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> @@ -1009,11 +910,9 @@ ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_8 ; CHECK-NEXT: .LBB9_4: @ %vector.ph -; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -1095,23 +994,14 @@ vector.ph: ; preds = %vector.memcheck %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert21 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat22 = shufflevector <4 x i32> %broadcast.splatinsert21, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert24 = insertelement <4 x i32> undef, i32 %c, i32 0 %broadcast.splat25 = shufflevector <4 x i32> %broadcast.splatinsert24, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds i32, i32* %a, i32 %index - -; %3 = icmp ule <4 x i32> %induction, %broadcast.splat22 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i32* %2 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %3, <4 x i32> undef) %5 = getelementptr inbounds i32, i32* %b, i32 %index @@ -1202,11 +1092,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB10_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: vldrb.u16 q0, [r1], #8 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 ; CHECK-NEXT: vmul.i16 q0, q1, q0 @@ -1221,21 +1109,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - -; %1 = icmp ule <8 x i32> %induction, %broadcast.splat13 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %wide.masked.load to <8 x i16> diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll 2021-07-09 07:05:01.000000000 +0000 @@ -9,9 +9,6 @@ ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TT:%.*]] = add i32 [[N_VEC]], -4 ; CHECK-NEXT: [[TT1:%.*]] = lshr i32 [[TT]], 2 ; CHECK-NEXT: [[TT2:%.*]] = add nuw nsw i32 [[TT1]], 1 @@ -30,9 +27,6 @@ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TT4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TT5:%.*]] = phi i32 [ [[START]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT15:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TT6:%.*]] = getelementptr inbounds i16, i16* [[TT3]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 @@ -66,9 +60,6 @@ for.cond1.preheader.us.preheader: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert28 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat29 = shufflevector <4 x i32> %broadcast.splatinsert28, <4 x i32> undef, <4 x i32> zeroinitializer %tt = add i32 %n.vec, -4 %tt1 = lshr i32 %tt, 2 %tt2 = add nuw nsw i32 %tt1, 1 @@ -88,14 +79,8 @@ %index = phi i32 [ 0, %for.cond1.preheader.us ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ %tt4, %for.cond1.preheader.us ], [ %tt14, %vector.body ] %tt5 = phi i32 [ %start, %for.cond1.preheader.us ], [ %tt15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tt6 = getelementptr inbounds i16, i16* %tt3, i32 %index - - ; %tt7 = icmp ule <4 x i32> %induction, %broadcast.splat29 %tt7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tt8 = bitcast i16* %tt6 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tt8, i32 2, <4 x i1> %tt7, <4 x i16> undef) %tt9 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -130,9 +115,6 @@ ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TT:%.*]] = add i32 [[N_VEC]], -4 ; CHECK-NEXT: [[TT1:%.*]] = lshr i32 [[TT]], 2 ; CHECK-NEXT: [[TT2:%.*]] = add nuw nsw i32 [[TT1]], 1 @@ -151,9 +133,6 @@ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TT4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TT5:%.*]] = phi i32 [ [[START]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TT6:%.*]] = getelementptr inbounds i32, i32* [[TT3]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 @@ -185,9 +164,6 @@ for.cond1.preheader.us.preheader: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert27 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat28 = shufflevector <4 x i32> %broadcast.splatinsert27, <4 x i32> undef, <4 x i32> zeroinitializer %tt = add i32 %n.vec, -4 %tt1 = lshr i32 %tt, 2 %tt2 = add nuw nsw i32 %tt1, 1 @@ -207,14 +183,8 @@ %index = phi i32 [ 0, %for.cond1.preheader.us ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ %tt4, %for.cond1.preheader.us ], [ %tt12, %vector.body ] %tt5 = phi i32 [ %start, %for.cond1.preheader.us ], [ %tt13, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tt6 = getelementptr inbounds i32, i32* %tt3, i32 %index - - ; %tt7 = icmp ule <4 x i32> %induction, %broadcast.splat28 %tt7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tt8 = bitcast i32* %tt6 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tt8, i32 4, <4 x i1> %tt7, <4 x i32> undef) %tt9 = getelementptr inbounds i32, i32* %B, i32 %index diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll 2021-07-09 07:09:48.000000000 +0000 @@ -30,7 +30,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -100,7 +99,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -172,7 +170,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -242,7 +239,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -314,7 +310,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -384,7 +379,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -481,7 +475,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -510,7 +503,6 @@ vector.ph47: ; preds = %middle.block %n.rnd.up48 = add i32 %N, 3 %n.vec50 = and i32 %n.rnd.up48, -4 - %trip.count.minus.154 = add i32 %N, -1 %i11 = insertelement <4 x i32> , i32 %i10, i32 0 br label %vector.body46 @@ -594,7 +586,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -719,7 +710,6 @@ vector.ph: ; preds = %lor.end %n.rnd.up = add i32 %4, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %4, -1 %5 = insertelement <4 x i32> , i32 %0, i32 0 br label %vector.body diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -enable-arm-maskedgatscat=false %s -o - | FileCheck %s define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { ; CHECK-LABEL: remat_vctp: diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir 2021-07-09 07:09:48.000000000 +0000 @@ -0,0 +1,330 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s +--- | + ; ModuleID = 'skip-vpt-debug.ll' + source_filename = "skip-vpt-debug.c" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabihf" + + ; Function Attrs: nofree norecurse nounwind optsize + define hidden void @arm_max_no_idx_f32(float* nocapture readonly %pSrc, i32 %blockSize, float* nocapture %pResult) local_unnamed_addr #0 !dbg !13 { + entry: + call void @llvm.dbg.value(metadata float* %pSrc, metadata !24, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata float* %pResult, metadata !26, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29 + %cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30 + br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31 + + vector.ph: ; preds = %entry + %n.rnd.up = add i32 %blockSize, 3, !dbg !31 + %n.vec = and i32 %n.rnd.up, -4, !dbg !31 + %0 = add i32 %n.vec, -4, !dbg !31 + %1 = lshr i32 %0, 2, !dbg !31 + %2 = add nuw nsw i32 %1, 1, !dbg !31 + %3 = call i32 @llvm.start.loop.iterations.i32(i32 %2), !dbg !31 + br label %vector.body, !dbg !31 + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi float* [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ] + %vec.phi = phi <4 x float> [ , %vector.ph ], [ %10, %vector.body ] + %4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ] + %5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ] + %lsr.iv12 = bitcast float* %lsr.iv1 to <4 x float>* + %6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5) + %7 = sub i32 %5, 4 + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34 + %8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38 + %9 = and <4 x i1> %6, %8, !dbg !40 + %10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40 + %scevgep = getelementptr float, float* %lsr.iv1, i32 4 + %11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1) + %12 = icmp ne i32 %11, 0 + br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41 + + middle.block: ; preds = %vector.body + %13 = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> %10), !dbg !31 + br label %while.end, !dbg !45 + + while.end: ; preds = %middle.block, %entry + %maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29 + store float %maxValue.0.lcssa, float* %pResult, align 4, !dbg !45, !tbaa !34 + ret void, !dbg !46 + } + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2 + + ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn + declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #3 + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2 + + ; Function Attrs: noduplicate nofree nosync nounwind willreturn + declare i32 @llvm.start.loop.iterations.i32(i32) #4 + + ; Function Attrs: noduplicate nofree nosync nounwind willreturn + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4 + + ; Function Attrs: nounwind readnone + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #5 + + attributes #0 = { nofree norecurse nounwind optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-fp16fml,-hwdiv-arm,-i8mm,-sb,-sha2" } + attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } + attributes #2 = { nofree nosync nounwind readnone willreturn } + attributes #3 = { argmemonly nofree nosync nounwind readonly willreturn } + attributes #4 = { noduplicate nofree nosync nounwind willreturn } + attributes #5 = { nounwind readnone } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11} + !llvm.ident = !{!12} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "skip-vpt-debug.c", directory: "/home/vicspe01") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{i32 1, !"static_rwdata", i32 1} + !7 = !{i32 1, !"enumsize_buildattr", i32 2} + !8 = !{i32 1, !"armlib_unavailable", i32 0} + !9 = !{i32 1, !"branch-target-enforcement", i32 0} + !10 = !{i32 1, !"sign-return-address", i32 0} + !11 = !{i32 1, !"sign-return-address-all", i32 0} + !12 = !{!"Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]"} + !13 = distinct !DISubprogram(name: "arm_max_no_idx_f32", scope: !1, file: !1, line: 5, type: !14, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !23) + !14 = !DISubroutineType(types: !15) + !15 = !{null, !16, !20, !22} + !16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 32) + !17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18) + !18 = !DIDerivedType(tag: DW_TAG_typedef, name: "float32_t", file: !1, line: 1, baseType: !19) + !19 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) + !20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !1, line: 2, baseType: !21) + !21 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) + !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 32) + !23 = !{!24, !25, !26, !27, !28} + !24 = !DILocalVariable(name: "pSrc", arg: 1, scope: !13, file: !1, line: 5, type: !16) + !25 = !DILocalVariable(name: "blockSize", arg: 2, scope: !13, file: !1, line: 5, type: !20) + !26 = !DILocalVariable(name: "pResult", arg: 3, scope: !13, file: !1, line: 6, type: !22) + !27 = !DILocalVariable(name: "maxValue", scope: !13, file: !1, line: 7, type: !18) + !28 = !DILocalVariable(name: "newVal", scope: !13, file: !1, line: 8, type: !18) + !29 = !DILocation(line: 0, scope: !13) + !30 = !DILocation(line: 10, column: 20, scope: !13) + !31 = !DILocation(line: 10, column: 3, scope: !13) + !32 = !DILocation(line: 11, column: 14, scope: !33) + !33 = distinct !DILexicalBlock(scope: !13, file: !1, line: 10, column: 26) + !34 = !{!35, !35, i64 0} + !35 = !{!"float", !36, i64 0} + !36 = !{!"omnipotent char", !37, i64 0} + !37 = !{!"Simple C/C++ TBAA"} + !38 = !DILocation(line: 12, column: 18, scope: !39) + !39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 12, column: 9) + !40 = !DILocation(line: 12, column: 9, scope: !33) + !41 = distinct !{!41, !31, !42, !43, !44} + !42 = !DILocation(line: 15, column: 3, scope: !13) + !43 = !{!"llvm.loop.mustprogress"} + !44 = !{!"llvm.loop.isvectorized", i32 1} + !45 = !DILocation(line: 16, column: 12, scope: !13) + !46 = !DILocation(line: 17, column: 1, scope: !13) + +... +--- +name: arm_max_no_idx_f32 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: + - id: 0 + value: float 0x3810000000000000 + alignment: 4 + isTargetSpecific: false +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_max_no_idx_f32 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.4(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: tCBZ renamable $r1, %bb.4, debug-location !31 + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, undef renamable $q0 + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1, debug-location !31 + ; CHECK: bb.2.vector.body (align 4): + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, debug-location !32 :: (load 16 from %ir.lsr.iv12, align 4, !tbaa !34) + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + ; CHECK: MVE_VPTv4f32 8, renamable $q1, renamable $q0, 12, implicit-def $vpr, debug-location !40 + ; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $q0, $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31 + ; CHECK: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit killed $q0, debug-location !31 + ; CHECK: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31 + ; CHECK: tB %bb.5, 14 /* CC::al */, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; CHECK: bb.5.while.end: + ; CHECK: liveins: $r2, $s0 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store 4 into %ir.pResult, !tbaa !34) + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46 + ; CHECK: bb.6 (align 4): + ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 4 + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2, $r7, $lr + + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + tCBZ renamable $r1, %bb.4, debug-location !31 + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg, debug-location !31 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg, debug-location !31 + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg, debug-location !31 + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg, debug-location !31 + renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, undef renamable $q0 + renamable $lr = t2DoLoopStartTP killed renamable $r3, renamable $r1, debug-location !31 + + bb.2.vector.body (align 4): + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 2, implicit $vpr, debug-location !32 + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, debug-location !32 :: (load 16 from %ir.lsr.iv12, align 4, !tbaa !34) + DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, debug-location !40 + renamable $q0 = MVE_VORR killed renamable $q1, renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40 + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg + + bb.3.middle.block: + successors: %bb.5(0x80000000) + liveins: $q0, $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31 + renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit $q0, debug-location !31 + renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31 + tB %bb.5, 14 /* CC::al */, $noreg + + bb.4: + successors: %bb.5(0x80000000) + liveins: $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + + bb.5.while.end: + liveins: $r2, $s0 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store 4 into %ir.pResult, !tbaa !34) + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46 + + bb.6 (align 4): + CONSTPOOL_ENTRY 0, %const.0, 4 + +... diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll 2021-07-09 07:05:01.000000000 +0000 @@ -22,23 +22,14 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <16 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <16 x i32> %broadcast.splatinsert10, <16 x i32> undef, <16 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer - %induction = or <16 x i32> %broadcast.splat, %tmp = getelementptr inbounds i8, i8* %a, i32 %index - -; %tmp1 = icmp ule <16 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i8* %tmp to <16 x i8>* %wide.masked.load = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %tmp2, i32 4, <16 x i1> %active.lane.mask, <16 x i8> undef) %tmp3 = getelementptr inbounds i8, i8* %b, i32 %index @@ -79,23 +70,14 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp = getelementptr inbounds i16, i16* %a, i32 %index - -; %tmp1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i16* %tmp to <8 x i16>* %wide.masked.load = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp2, i32 4, <8 x i1> %active.lane.mask, <8 x i16> undef) %tmp3 = getelementptr inbounds i16, i16* %b, i32 %index @@ -135,20 +117,13 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - ; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %tmp2 = bitcast i32* %tmp to <4 x i32>* %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -190,20 +165,13 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -262,10 +230,7 @@ %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wrong = icmp ult <4 x i32> %induction, %broadcast.splat11 %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -321,10 +286,7 @@ %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wrong = icmp ult <4 x i32> %induction, %broadcast.splat11 %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -370,7 +332,6 @@ vector.ph: - %trip.count.minus.1 = add i32 %N, -1 %scevgep = getelementptr i32, i32* %A, i32 8 %scevgep30 = getelementptr i32, i32* %C, i32 8 %scevgep37 = getelementptr i32, i32* %B, i32 8 @@ -459,9 +420,7 @@ %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 42) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load @@ -495,7 +454,6 @@ br i1 %cmp8, label %vector.ph, label %for.cond.cleanup vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body @@ -509,9 +467,7 @@ %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %index) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load @@ -546,7 +502,6 @@ br i1 %cmp8, label %vector.ph, label %for.cond.cleanup vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll 2021-07-09 07:05:01.000000000 +0000 @@ -10,22 +10,17 @@ ; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i32* [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32* [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 32003, [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[LSR_IV1416:%.*]] = bitcast i32* [[LSR_IV14]] to <4 x i32>* ; CHECK-NEXT: [[LSR_IV1113:%.*]] = bitcast i32* [[LSR_IV11]] to <4 x i32>* ; CHECK-NEXT: [[LSR_IV10:%.*]] = bitcast i32* [[LSR_IV]] to <4 x i32>* -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3]] = sub i32 [[TMP1]], 4 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[LSR_IV10]], i32 4, <4 x i1> [[TMP2]], <4 x i32> undef) ; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[LSR_IV1113]], i32 4, <4 x i1> [[TMP2]], <4 x i32> undef) ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP4]], <4 x i32>* [[LSR_IV1416]], i32 4, <4 x i1> [[TMP2]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, i32* [[LSR_IV]], i32 4 ; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, i32* [[LSR_IV11]], i32 4 ; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, i32* [[LSR_IV14]], i32 4 @@ -48,13 +43,7 @@ %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - - ; %1 = icmp ult <4 x i32> %induction, %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -244,11 +233,7 @@ %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, - -; Non-uniform constant vector here. This can't be represented with -; @llvm.get.active.lane.mask, but let's keep this test as a sanity check: %1 = icmp ult <4 x i32> %induction, - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -285,13 +270,8 @@ %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - ; BTC = UINT_MAX, and scalar trip count BTC + 1 would overflow: %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 4294967295) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -328,12 +308,7 @@ %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -371,13 +346,8 @@ %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - ; The induction variable %N is not an IV: %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %N, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -414,12 +384,7 @@ %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -460,9 +425,6 @@ %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) @@ -514,10 +476,8 @@ %lsr.iv3840 = bitcast i32* %lsr.iv38 to <4 x i32>* %lsr.iv3335 = bitcast i32* %lsr.iv33 to <4 x i32>* %lsr.iv2830 = bitcast i32* %lsr.iv28 to <4 x i32>* - ; It's using %j.025, the induction variable from its outer loop: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %j.025, i32 4096) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv3840, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load27 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv3335, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %1 = add nsw <4 x i32> %wide.masked.load27, %wide.masked.load diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll 2021-07-09 07:05:01.000000000 +0000 @@ -82,7 +82,6 @@ br i1 %cmp8, label %vector.ph, label %for.cond.cleanup vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body @@ -92,13 +91,10 @@ %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll 2021-07-09 07:05:01.000000000 +0000 @@ -27,7 +27,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -77,7 +76,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll 2021-07-09 07:05:01.000000000 +0000 @@ -26,7 +26,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll 2021-07-09 07:05:01.000000000 +0000 @@ -26,7 +26,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -72,7 +71,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -118,7 +116,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -164,7 +161,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -210,7 +206,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -260,7 +255,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll 2021-07-09 07:05:01.000000000 +0000 @@ -27,7 +27,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -77,7 +76,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll 2021-07-09 07:05:01.000000000 +0000 @@ -18,8 +18,6 @@ %tmp = add i32 %N, -1 %n.rnd.up = add i32 %tmp, 8 %n.vec = and i32 %n.rnd.up, -8 - %broadcast.splatinsert1 = insertelement <8 x i32> undef, i32 %tmp, i32 0 - %broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> undef, <8 x i32> zeroinitializer %0 = add i32 %n.vec, -8 %1 = lshr i32 %0, 3 %2 = add i32 %1, 1 @@ -30,14 +28,8 @@ %index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp8, %vector.body ] %3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index - - ; %tmp3 = icmp ule <8 x i32> %induction, %broadcast.splat2 %tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp4 = bitcast i16* %tmp2 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef) %tmp5 = getelementptr inbounds i16, i16* %B, i32 %index @@ -87,8 +79,6 @@ %tmp = add i32 %N, -1 %n.rnd.up = add nuw nsw i32 %tmp, 8 %n.vec = and i32 %n.rnd.up, -8 - %broadcast.splatinsert1 = insertelement <8 x i32> undef, i32 %tmp, i32 0 - %broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> undef, <8 x i32> zeroinitializer %broadcast.splatinsert3 = insertelement <8 x i16> undef, i16 %B, i32 0 %broadcast.splat4 = shufflevector <8 x i16> %broadcast.splatinsert3, <8 x i16> undef, <8 x i32> zeroinitializer %0 = add i32 %n.vec, -8 @@ -101,14 +91,8 @@ %index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp6, %vector.body ] %3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index - - ; %tmp3 = icmp ule <8 x i32> %induction, %broadcast.splat2 %tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp4 = bitcast i16* %tmp2 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef) %tmp5 = add <8 x i16> %vec.phi, %broadcast.splat4 @@ -151,8 +135,6 @@ %tmp = add i32 %N, -1 %n.rnd.up = add nuw nsw i32 %tmp, 8 %n.vec = and i32 %n.rnd.up, -8 - %broadcast.splatinsert1 = insertelement <8 x i32> undef, i32 %tmp, i32 0 - %broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> undef, <8 x i32> zeroinitializer %broadcast.splatinsert3 = insertelement <8 x i16> undef, i16 %B, i32 0 %broadcast.splat4 = shufflevector <8 x i16> %broadcast.splatinsert3, <8 x i16> undef, <8 x i32> zeroinitializer %0 = add i32 %n.vec, -8 @@ -165,14 +147,8 @@ %index = phi i32 [ 0, %entry], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %entry], [ %tmp6, %vector.body ] %3 = phi i32 [ %start, %entry ], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index - - ; %tmp3 = icmp ule <8 x i32> %induction, %broadcast.splat2 %tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp4 = bitcast i16* %tmp2 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef) %tmp5 = add <8 x i16> %vec.phi, %broadcast.splat4 @@ -227,7 +203,6 @@ br i1 %cmp433, label %vector.ph, label %for.end vector.ph: ; preds = %for.body - %trip.count.minus.1 = add i32 %8, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %7) br label %vector.body diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll 2021-07-09 07:05:01.000000000 +0000 @@ -14,23 +14,14 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp = getelementptr inbounds i16, i16* %a, i32 %index - - ; %tmp1 = icmp ule <8 x i32> %induction, %broadcast.splat11 - %tmp1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - + %tmp1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) %tmp2 = bitcast i16* %tmp to <8 x i16>* %wide.masked.load = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp2, i32 4, <8 x i1> %tmp1, <8 x i16> undef) %tmp3 = getelementptr inbounds i16, i16* %b, i32 %index @@ -72,8 +63,6 @@ vector.ph: ; preds = %entry %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer %broadcast.splatinsert10.store = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 %broadcast.splat11.store = shufflevector <4 x i32> %broadcast.splatinsert10.store, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) @@ -83,14 +72,8 @@ %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %store.idx = phi i32 [ 0, %vector.ph ], [ %store.idx.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp = getelementptr inbounds i16, i16* %a, i32 %index - - ; %tmp1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %tmp1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i16* %tmp to <8 x i16>* %wide.masked.load = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp2, i32 4, <8 x i1> %tmp1, <8 x i16> undef) %tmp3 = getelementptr inbounds i16, i16* %b, i32 %index @@ -136,23 +119,14 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - - ; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll 2021-07-09 07:05:01.000000000 +0000 @@ -183,7 +183,6 @@ br i1 %cmp433, label %vector.ph, label %for.end vector.ph: ; preds = %for.body - %trip.count.minus.1 = add i32 %i8, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %i7) br label %vector.body diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll 2021-07-09 07:09:48.000000000 +0000 @@ -11,27 +11,25 @@ ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 -; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 +; CHECK-NEXT: vldrwt.u32 q1, [r0], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vmul.i32 q0, q2, q0 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vmul.i32 q1, q2, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} entry: @@ -41,22 +39,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %6, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %a, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -93,7 +82,6 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r1, #4 ; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -101,7 +89,6 @@ ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 -; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB1_2 @@ -116,22 +103,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %a, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat10 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %vec.phi @@ -164,7 +142,6 @@ ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r1, #4 ; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -172,7 +149,6 @@ ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 -; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB2_2 @@ -187,22 +163,13 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %a, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat10 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %vec.phi @@ -228,11 +195,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 @@ -246,23 +211,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %c, i32 0 %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat11 @@ -285,11 +241,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 @@ -303,23 +257,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %c, i32 0 %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %broadcast.splat11 @@ -342,11 +287,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.8 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #16 ; CHECK-NEXT: vldrb.u8 q0, [r1], #16 ; CHECK-NEXT: vldrb.u8 q1, [r2], #16 ; CHECK-NEXT: vmul.i8 q0, q1, q0 @@ -361,21 +304,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <16 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <16 x i32> %broadcast.splatinsert12, <16 x i32> undef, <16 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer - %induction = add <16 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - - ; %1 = icmp ule <16 x i32> %induction, %broadcast.splat13 %1 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <16 x i8>* %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> undef) %3 = getelementptr inbounds i8, i8* %c, i32 %index @@ -402,11 +336,9 @@ ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB6_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r2], #16 ; CHECK-NEXT: vmul.i16 q0, q1, q0 @@ -421,21 +353,12 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - - ; %1 = icmp ule <8 x i32> %induction, %broadcast.splat13 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %2, i32 2, <8 x i1> %1, <8 x i16> undef) %3 = getelementptr inbounds i16, i16* %c, i32 %index diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll 2021-07-09 07:05:01.000000000 +0000 @@ -30,9 +30,6 @@ br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body @@ -44,13 +41,7 @@ %6 = phi i32 [ %start, %vector.ph ], [ %10, %vector.body ] %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>* %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - - ; %7 = icmp ule <4 x i32> %induction, %broadcast.splat12 %7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %7, <4 x i32> undef) %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %7, <4 x i32> undef) %8 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/lsll0.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/lsll0.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/lsll0.ll 2020-10-16 21:13:09.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/lsll0.ll 2021-07-09 07:05:01.000000000 +0000 @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s define void @_Z4loopPxS_iS_i(i64* %d) { ; CHECK-LABEL: _Z4loopPxS_iS_i: diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll 2021-07-09 07:05:01.000000000 +0000 @@ -11,11 +11,9 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 @@ -30,23 +28,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -74,11 +63,9 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 @@ -93,23 +80,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -138,11 +116,9 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 @@ -157,23 +133,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -201,11 +168,9 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 @@ -220,23 +185,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat13 = shufflevector <4 x float> %broadcast.splatinsert12, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = fmul fast <4 x float> %wide.masked.load, %broadcast.splat13 @@ -265,12 +221,10 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: eor r12, r12, #-2147483648 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 @@ -286,23 +240,14 @@ %fneg = fneg fast float %a %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %fneg, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -329,14 +274,12 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB5_1: @ %vector.ph -; CHECK-NEXT: vmov lr, s0 -; CHECK-NEXT: vdup.32 q0, lr +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: vneg.f32 q0, q0 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -352,23 +295,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -398,11 +332,9 @@ ; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -418,23 +350,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -464,11 +387,9 @@ ; CHECK-NEXT: .LBB7_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -484,23 +405,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -529,12 +441,10 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: eor r12, r12, #-2147483648 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 @@ -550,23 +460,14 @@ %fneg = fneg fast float %a %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %fneg, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -595,11 +496,9 @@ ; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vfms.f32 q2, q1, q0 @@ -614,23 +513,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -659,16 +549,14 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB10_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0], #16 -; CHECK-NEXT: vldrw.u32 q1, [r1], #16 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vneg.f32 q1, q1 -; CHECK-NEXT: vfma.f32 q1, q0, r12 -; CHECK-NEXT: vstrw.32 q1, [r2], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vneg.f32 q0, q0 +; CHECK-NEXT: vfma.f32 q0, q1, r12 +; CHECK-NEXT: vstrw.32 q0, [r2], #16 ; CHECK-NEXT: letp lr, .LBB10_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -679,23 +567,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -724,16 +603,14 @@ ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB11_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB11_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0], #16 -; CHECK-NEXT: vldrw.u32 q1, [r1], #16 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vneg.f32 q1, q1 -; CHECK-NEXT: vfma.f32 q1, q0, r12 -; CHECK-NEXT: vstrw.32 q1, [r2], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vneg.f32 q0, q0 +; CHECK-NEXT: vfma.f32 q0, q1, r12 +; CHECK-NEXT: vstrw.32 q0, [r2], #16 ; CHECK-NEXT: letp lr, .LBB11_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -744,23 +621,14 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat13 = shufflevector <4 x float> %broadcast.splatinsert12, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = fmul fast <4 x float> %wide.masked.load, %broadcast.splat13 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll 2021-07-09 07:09:48.000000000 +0000 @@ -244,7 +244,6 @@ ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 ; CHECK-NEXT: .long 0 @ 0x0 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -288,7 +287,6 @@ ; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 vector.ph41: ; preds = %for.body6.preheader - %ind.end47 = shl i32 %n.vec43, 1 br label %vector.body39 vector.body39: ; preds = %vector.body39, %vector.ph41 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll 2021-07-09 07:05:01.000000000 +0000 @@ -1,18 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py - - -; RUN: opt --arm-mve-gather-scatter-lowering -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -S -o 2>/dev/null - | FileCheck %s +; RUN: opt --arm-mve-gather-scatter-lowering -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -S -o - | FileCheck %s define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC:%.*]], 1 ; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[PUSHEDOUTADD]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY_END]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 50 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 48 ; CHECK-NEXT: br i1 [[TMP0]], label [[LOWER_BLOCK:%.*]], label [[END:%.*]] ; CHECK: lower.block: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[VEC_IND]], i32 32, i32 2, i32 1) @@ -23,20 +20,19 @@ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: br label [[VECTOR_BODY_END]] ; CHECK: vector.body.end: -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP4]], label [[END]], label [[VECTOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; vector.ph: - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body.end ] %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body.end ] - %0 = icmp eq i32 %index, 50 + %0 = icmp eq i32 %index, 48 br i1 %0, label %lower.block, label %end lower.block: ; preds = %vector.body @@ -61,7 +57,6 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC:%.*]], 1 ; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> , ; CHECK-NEXT: [[PRODUCT:%.*]] = mul <4 x i32> , ; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], @@ -69,7 +64,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[PUSHEDOUTADD]], [[VECTOR_PH]] ], [ [[INCREMENTPUSHEDOUTMUL:%.*]], [[VECTOR_BODY_END]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 50 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 48 ; CHECK-NEXT: br i1 [[TMP0]], label [[LOWER_BLOCK:%.*]], label [[END:%.*]] ; CHECK: lower.block: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[VEC_IND]], i32 32, i32 2, i32 1) @@ -80,20 +75,19 @@ ; CHECK-NEXT: br label [[VECTOR_BODY_END]] ; CHECK: vector.body.end: ; CHECK-NEXT: [[INCREMENTPUSHEDOUTMUL]] = add <4 x i32> [[VEC_IND]], [[PRODUCT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP4]], label [[END]], label [[VECTOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; vector.ph: - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body.end ] %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body.end ] - %0 = icmp eq i32 %index, 50 + %0 = icmp eq i32 %index, 48 br i1 %0, label %lower.block, label %end lower.block: ; preds = %vector.body @@ -120,7 +114,6 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_loop( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC:%.*]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -138,19 +131,18 @@ ; CHECK-NEXT: br label [[VECTOR_2_BODY_END:%.*]] ; CHECK: vector.2.body.end: ; CHECK-NEXT: [[INDEX_2_NEXT:%.*]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_2_NEXT]], 15 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_2_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_BODY_END]], label [[VECTOR_2_BODY]] ; CHECK: vector.body.end: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP6]], label [[END:%.*]], label [[VECTOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; vector.ph: - %ind.end = shl i32 %n.vec, 2 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -162,7 +154,6 @@ br label %vector.2.body vector.2.body: ; preds = %vector.body - %index.2 = phi i32 [ 0, %vector.2.ph ], [ %index.2.next, %vector.2.body.end ] %0 = mul <4 x i32> %vec.ind, %1 = add <4 x i32> %0, %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 @@ -174,7 +165,7 @@ vector.2.body.end: ; preds = %lower.block %index.2.next = add i32 %index, 4 - %5 = icmp eq i32 %index.2.next, 15 + %5 = icmp eq i32 %index.2.next, 16 br i1 %5, label %vector.body.end, label %vector.2.body vector.body.end: ; preds = %lower.block @@ -187,4 +178,48 @@ ret void; } +define arm_aapcs_vfpcc void @invariant_add(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +; CHECK-LABEL: @invariant_add( +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[L1:%.*]] = add <4 x i32> [[L0]], [[VEC_IND]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[L1]], i32 32, i32 2, i32 1) +; CHECK-NEXT: [[L3:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[L4:%.*]] = bitcast i32* [[L3]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP0]], <4 x i32>* [[L4]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[L5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] +; CHECK-NEXT: br i1 [[L5]], label [[END:%.*]], label [[VECTOR_BODY]] +; CHECK: end: +; CHECK-NEXT: ret void +; + +vector.ph: + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %l0 = mul <4 x i32> %vec.ind, + %l1 = add <4 x i32> %l0, %vec.ind + %l2 = getelementptr inbounds i32, i32* %data, <4 x i32> %l1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %l2, i32 4, <4 x i1> , <4 x i32> undef) + %l3 = getelementptr inbounds i32, i32* %dst, i32 %index + %l4 = bitcast i32* %l3 to <4 x i32>* + store <4 x i32> %wide.masked.gather, <4 x i32>* %l4, align 4 + %index.next = add i32 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, + %l5 = icmp eq i32 %index.next, %n.vec + br i1 %l5, label %end, label %vector.body + +end: + ret void; +} + + declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll 2021-07-09 07:09:48.000000000 +0000 @@ -39,7 +39,6 @@ ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -83,7 +82,6 @@ ; CHECK-NEXT: .long 16 @ 0x10 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -127,7 +125,6 @@ ; CHECK-NEXT: .long 0 @ 0x0 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -173,7 +170,6 @@ <4 x i32> %to.store) { vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -215,7 +211,6 @@ <4 x i32> %to.store) { vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -259,7 +254,6 @@ i32* noalias nocapture %dst, i32 %n.vec) { vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -301,7 +295,6 @@ ; CHECK-NEXT: .long 16 @ 0x10 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -328,26 +321,29 @@ ret void; } -define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) { ; CHECK-LABEL: non_gatscat_use1: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: adr r3, .LCPI7_0 -; CHECK-NEXT: vmov.i32 q0, #0x8 -; CHECK-NEXT: vldrw.u32 q2, [r3] +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: adr.w r12, .LCPI7_0 +; CHECK-NEXT: vmov.i32 q0, #0x9 +; CHECK-NEXT: vldrw.u32 q3, [r12] ; CHECK-NEXT: vmov.i32 q1, #0xc +; CHECK-NEXT: vmov.i32 q2, #0x8 ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vadd.i32 q3, q2, q0 -; CHECK-NEXT: vmlas.u32 q2, q1, r0 -; CHECK-NEXT: vldrw.u32 q4, [q2, #24] +; CHECK-NEXT: vadd.i32 q4, q3, q2 +; CHECK-NEXT: vmul.i32 q5, q3, q0 +; CHECK-NEXT: vmlas.u32 q3, q1, r0 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q2, q3 -; CHECK-NEXT: vstrb.8 q4, [r1], #16 +; CHECK-NEXT: vldrw.u32 q6, [q3, #24] +; CHECK-NEXT: vmov q3, q4 +; CHECK-NEXT: vstrw.32 q5, [r3] +; CHECK-NEXT: vstrb.8 q6, [r1], #16 ; CHECK-NEXT: bne .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %end -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: @@ -358,7 +354,6 @@ ; CHECK-NEXT: .long 6 @ 0x6 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -372,6 +367,7 @@ %4 = bitcast i32* %3 to <4 x i32>* store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 %non_gatscat_use = mul <4 x i32> %0, + store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, %5 = icmp eq i32 %index.next, %n.vec @@ -381,26 +377,31 @@ ret void; } -define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) { ; CHECK-LABEL: non_gatscat_use2: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: adr r3, .LCPI8_0 -; CHECK-NEXT: vmov.i32 q0, #0x8 -; CHECK-NEXT: vldrw.u32 q2, [r3] -; CHECK-NEXT: vmov.i32 q1, #0xc +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: adr.w r12, .LCPI8_0 +; CHECK-NEXT: vmov.i32 q0, #0x12 +; CHECK-NEXT: vldrw.u32 q4, [r12] +; CHECK-NEXT: vmov.i32 q1, #0x9 +; CHECK-NEXT: vmov.i32 q2, #0x8 +; CHECK-NEXT: vmov.i32 q3, #0xc ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vadd.i32 q3, q2, q0 -; CHECK-NEXT: vmlas.u32 q2, q1, r0 -; CHECK-NEXT: vldrw.u32 q4, [q2, #24] +; CHECK-NEXT: vadd.i32 q5, q4, q2 +; CHECK-NEXT: vmul.i32 q6, q4, q1 +; CHECK-NEXT: vmlas.u32 q4, q3, r0 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q2, q3 -; CHECK-NEXT: vstrb.8 q4, [r1], #16 +; CHECK-NEXT: vldrw.u32 q7, [q4, #24] +; CHECK-NEXT: vadd.i32 q4, q6, q0 +; CHECK-NEXT: vstrw.32 q4, [r3] +; CHECK-NEXT: vmov q4, q5 +; CHECK-NEXT: vstrb.8 q7, [r1], #16 ; CHECK-NEXT: bne .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %end -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: @@ -411,7 +412,6 @@ ; CHECK-NEXT: .long 6 @ 0x6 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -425,6 +425,7 @@ %4 = bitcast i32* %3 to <4 x i32>* store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 %non_gatscat_use = mul <4 x i32> %1, + store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, %5 = icmp eq i32 %index.next, %n.vec @@ -528,7 +529,6 @@ %2 = add nuw i32 %1, 1 %min.iters.check = icmp ult i32 %0, 6 %n.vec = and i32 %2, -4 - %ind.end = shl i32 %n.vec, 1 %broadcast.splatinsert86 = insertelement <4 x i32> undef, i32 %m, i32 0 %broadcast.splat87 = shufflevector <4 x i32> %broadcast.splatinsert86, <4 x i32> undef, <4 x i32> zeroinitializer %cmp.n = icmp eq i32 %2, %n.vec @@ -854,12 +854,12 @@ ; CHECK-NEXT: add.w r8, r7, #10 ; CHECK-NEXT: adr r7, .LCPI11_0 ; CHECK-NEXT: ldr r1, [sp, #96] -; CHECK-NEXT: vdup.32 q1, r2 -; CHECK-NEXT: vldrw.u32 q0, [r7] +; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: vldrw.u32 q1, [r7] ; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: mov.w r9, #6 ; CHECK-NEXT: movs r6, #11 -; CHECK-NEXT: vshl.i32 q1, q1, #2 +; CHECK-NEXT: vshl.i32 q0, q0, #2 ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: .LBB11_1: @ %for.body10.i ; CHECK-NEXT: @ =>This Loop Header: Depth=1 @@ -894,10 +894,10 @@ ; CHECK-NEXT: mul r4, r11, r6 ; CHECK-NEXT: vdup.32 q3, r5 ; CHECK-NEXT: vdup.32 q2, r7 -; CHECK-NEXT: vadd.i32 q4, q0, r4 +; CHECK-NEXT: vadd.i32 q4, q1, r4 ; CHECK-NEXT: vmla.u32 q3, q4, r2 ; CHECK-NEXT: adds r4, #113 -; CHECK-NEXT: vadd.i32 q4, q0, r4 +; CHECK-NEXT: vadd.i32 q4, q1, r4 ; CHECK-NEXT: mov r4, r8 ; CHECK-NEXT: vmla.u32 q2, q4, r2 ; CHECK-NEXT: .LBB11_5: @ %vector.body @@ -907,8 +907,8 @@ ; CHECK-NEXT: @ Parent Loop BB11_4 Depth=4 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=5 ; CHECK-NEXT: vldrb.s32 q6, [r0, q2] -; CHECK-NEXT: vadd.i32 q5, q2, q1 -; CHECK-NEXT: vadd.i32 q4, q3, q1 +; CHECK-NEXT: vadd.i32 q5, q2, q0 +; CHECK-NEXT: vadd.i32 q4, q3, q0 ; CHECK-NEXT: subs r4, #4 ; CHECK-NEXT: vadd.i32 q2, q6, r2 ; CHECK-NEXT: vldrb.s32 q6, [r1, q3] @@ -978,7 +978,6 @@ br i1 0, label %for.cond.cleanup20.i, label %for.cond22.preheader.lr.ph.i for.cond22.preheader.lr.ph.i: ; preds = %for.body10.i - %ind.end = add nsw i32 0, %n.vec %.splatinsert = insertelement <4 x i32> undef, i32 0, i32 0 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %.splat, diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s + +; This files has some unused gathers, making sure that they do not cause +; problems as the function gets simplified. + +define arm_aapcs_vfpcc void @unused1(<4 x i32*> %offs) { +; CHECK-LABEL: unused1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + ret void +} + +define arm_aapcs_vfpcc void @unused2(<4 x i32*> %offs) { +; CHECK-LABEL: unused2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %gather1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + %gather2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + ret void +} + +define arm_aapcs_vfpcc void @unused2_used(<4 x i32*> %offs) { +; CHECK-LABEL: unused2_used: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %gather1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + %gather2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + %unused = add <4 x i32> %gather1, %gather2 + ret void +} + + +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-phireg.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-phireg.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-phireg.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-phireg.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s ; verify-machineinstrs previously caught the incorrect use of QPR in the stack reloads. diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll 2021-07-09 07:09:48.000000000 +0000 @@ -170,8 +170,8 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpugez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vcmp.i32 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-selectcc.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-selectcc.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-selectcc.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-selectcc.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK define arm_aapcs_vfpcc <4 x i32> @test_v4i32(i32 %x, <4 x i32> %s0, <4 x i32> %s1) { ; CHECK-LABEL: test_v4i32: diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll 2021-07-09 07:09:48.000000000 +0000 @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +; This test has an instruction that gets sunk into the loop, that is a +; active.lane.mask operand. (%exitcount.ptrcnt.to.int = ptrtoint). We +; need to make sure it is loop invariant. + +define i32 @a(i32* readnone %b, i8* %c) { +; CHECK-LABEL: a: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: it ls +; CHECK-NEXT: popls {r4, pc} +; CHECK-NEXT: .LBB0_1: @ %while.body.preheader +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: add.w r2, r0, #15 +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: bic r2, r2, #15 +; CHECK-NEXT: subs r2, #16 +; CHECK-NEXT: add.w lr, r3, r2, lsr #4 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB0_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: adds r3, r1, r2 +; CHECK-NEXT: vctp.8 r0 +; CHECK-NEXT: vmov.8 q0[0], r3 +; CHECK-NEXT: adds r4, r3, #1 +; CHECK-NEXT: vmov.8 q0[1], r4 +; CHECK-NEXT: adds r4, r3, #2 +; CHECK-NEXT: vmov.8 q0[2], r4 +; CHECK-NEXT: adds r4, r3, #3 +; CHECK-NEXT: vmov.8 q0[3], r4 +; CHECK-NEXT: adds r4, r3, #4 +; CHECK-NEXT: vmov.8 q0[4], r4 +; CHECK-NEXT: adds r4, r3, #5 +; CHECK-NEXT: vmov.8 q0[5], r4 +; CHECK-NEXT: adds r4, r3, #6 +; CHECK-NEXT: vmov.8 q0[6], r4 +; CHECK-NEXT: adds r4, r3, #7 +; CHECK-NEXT: vmov.8 q0[7], r4 +; CHECK-NEXT: add.w r4, r3, #8 +; CHECK-NEXT: vmov.8 q0[8], r4 +; CHECK-NEXT: add.w r4, r3, #9 +; CHECK-NEXT: vmov.8 q0[9], r4 +; CHECK-NEXT: add.w r4, r3, #10 +; CHECK-NEXT: vmov.8 q0[10], r4 +; CHECK-NEXT: add.w r4, r3, #11 +; CHECK-NEXT: vmov.8 q0[11], r4 +; CHECK-NEXT: add.w r4, r3, #12 +; CHECK-NEXT: vmov.8 q0[12], r4 +; CHECK-NEXT: add.w r4, r3, #13 +; CHECK-NEXT: vmov.8 q0[13], r4 +; CHECK-NEXT: add.w r4, r3, #14 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: subs r0, #16 +; CHECK-NEXT: vmov.8 q0[14], r4 +; CHECK-NEXT: adds r3, #15 +; CHECK-NEXT: vmov.8 q0[15], r3 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrbt.8 q0, [r12], #16 +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r4, pc} +entry: + %0 = bitcast i32* %b to i8* + %cmp3 = icmp ugt i8* %0, %c + br i1 %cmp3, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + %c5 = ptrtoint i8* %c to i32 + %1 = sub i32 0, %c5 + %uglygep = getelementptr i8, i8* %0, i32 %1 + %exitcount.ptrcnt.to.int = ptrtoint i8* %uglygep to i32 + %n.rnd.up = add i32 %exitcount.ptrcnt.to.int, 15 + %n.vec = and i32 %n.rnd.up, -16 + br label %vector.body + +vector.body: ; preds = %vector.body, %while.body.preheader + %index = phi i32 [ 0, %while.body.preheader ], [ %index.next, %vector.body ] + %next.gep = getelementptr i8, i8* %c, i32 %index + %2 = or i32 %index, 1 + %next.gep7 = getelementptr i8, i8* %c, i32 %2 + %3 = or i32 %index, 2 + %next.gep8 = getelementptr i8, i8* %c, i32 %3 + %4 = or i32 %index, 3 + %next.gep9 = getelementptr i8, i8* %c, i32 %4 + %5 = or i32 %index, 4 + %next.gep10 = getelementptr i8, i8* %c, i32 %5 + %6 = or i32 %index, 5 + %next.gep11 = getelementptr i8, i8* %c, i32 %6 + %7 = or i32 %index, 6 + %next.gep12 = getelementptr i8, i8* %c, i32 %7 + %8 = or i32 %index, 7 + %next.gep13 = getelementptr i8, i8* %c, i32 %8 + %9 = or i32 %index, 8 + %next.gep14 = getelementptr i8, i8* %c, i32 %9 + %10 = or i32 %index, 9 + %next.gep15 = getelementptr i8, i8* %c, i32 %10 + %11 = or i32 %index, 10 + %next.gep16 = getelementptr i8, i8* %c, i32 %11 + %12 = or i32 %index, 11 + %next.gep17 = getelementptr i8, i8* %c, i32 %12 + %13 = or i32 %index, 12 + %next.gep18 = getelementptr i8, i8* %c, i32 %13 + %14 = or i32 %index, 13 + %next.gep19 = getelementptr i8, i8* %c, i32 %14 + %15 = or i32 %index, 14 + %next.gep20 = getelementptr i8, i8* %c, i32 %15 + %16 = or i32 %index, 15 + %next.gep21 = getelementptr i8, i8* %c, i32 %16 + %17 = insertelement <16 x i8*> poison, i8* %next.gep, i32 0 + %18 = insertelement <16 x i8*> %17, i8* %next.gep7, i32 1 + %19 = insertelement <16 x i8*> %18, i8* %next.gep8, i32 2 + %20 = insertelement <16 x i8*> %19, i8* %next.gep9, i32 3 + %21 = insertelement <16 x i8*> %20, i8* %next.gep10, i32 4 + %22 = insertelement <16 x i8*> %21, i8* %next.gep11, i32 5 + %23 = insertelement <16 x i8*> %22, i8* %next.gep12, i32 6 + %24 = insertelement <16 x i8*> %23, i8* %next.gep13, i32 7 + %25 = insertelement <16 x i8*> %24, i8* %next.gep14, i32 8 + %26 = insertelement <16 x i8*> %25, i8* %next.gep15, i32 9 + %27 = insertelement <16 x i8*> %26, i8* %next.gep16, i32 10 + %28 = insertelement <16 x i8*> %27, i8* %next.gep17, i32 11 + %29 = insertelement <16 x i8*> %28, i8* %next.gep18, i32 12 + %30 = insertelement <16 x i8*> %29, i8* %next.gep19, i32 13 + %31 = insertelement <16 x i8*> %30, i8* %next.gep20, i32 14 + %32 = insertelement <16 x i8*> %31, i8* %next.gep21, i32 15 + %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %exitcount.ptrcnt.to.int) + %33 = ptrtoint <16 x i8*> %32 to <16 x i32> + %34 = trunc <16 x i32> %33 to <16 x i8> + %35 = bitcast i8* %next.gep to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %34, <16 x i8>* %35, i32 1, <16 x i1> %active.lane.mask) + %index.next = add i32 %index, 16 + %36 = icmp eq i32 %index.next, %n.vec + br i1 %36, label %while.end, label %vector.body + +while.end: ; preds = %vector.body, %entry + ret i32 undef +} + +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll 2021-07-09 07:09:48.000000000 +0000 @@ -1711,7 +1711,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1762,7 +1761,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1816,7 +1814,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1868,7 +1865,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1924,7 +1920,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1976,7 +1971,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2032,7 +2026,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2083,7 +2076,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2137,7 +2129,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2189,7 +2180,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2245,7 +2235,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2296,7 +2285,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2352,7 +2340,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2406,7 +2393,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2465,7 +2451,6 @@ vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir 2021-07-09 07:05:01.000000000 +0000 @@ -0,0 +1,110 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-none-unknown-eabihf" + + define <4 x i32> @test(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) #0 !dbg !5 { + entry: + call void @llvm.dbg.value(metadata <4 x i32> %x, metadata !17, metadata !DIExpression()), !dbg !21 + call void @llvm.dbg.value(metadata <4 x i32> %y, metadata !18, metadata !DIExpression()), !dbg !21 + call void @llvm.dbg.value(metadata <4 x i32> %z, metadata !19, metadata !DIExpression()), !dbg !21 + %0 = icmp sle <4 x i32> %x, %y, !dbg !22 + call void @llvm.dbg.value(metadata i32 undef, metadata !20, metadata !DIExpression()), !dbg !21 + %1 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i1> %0, <4 x i32> %z), !dbg !23 + call void @llvm.dbg.value(metadata <4 x i32> %1, metadata !19, metadata !DIExpression()), !dbg !21 + %2 = icmp sgt <4 x i32> %x, %y, !dbg !24 + call void @llvm.dbg.value(metadata i32 undef, metadata !20, metadata !DIExpression()), !dbg !21 + %3 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i1> %2, <4 x i32> %1), !dbg !25 + call void @llvm.dbg.value(metadata <4 x i32> %3, metadata !19, metadata !DIExpression()), !dbg !21 + ret <4 x i32> %3, !dbg !26 + } + + declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1 + declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + + attributes #0 = { "target-features"="+fullfp16,+lob,+mve.fp" } + attributes #1 = { nounwind readnone "target-features"="+fullfp16,+lob,+mve.fp" } + attributes #2 = { nofree nosync nounwind readnone speculatable willreturn "target-features"="+fullfp16,+lob,+mve.fp" } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project 921572a18dc9b97c259bda2ce8130f04b2ebe3ed)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "tmp.c", directory: "/work/llvm-project/build") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !6, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8, !8, !8} + !8 = !DIDerivedType(tag: DW_TAG_typedef, name: "int32x4_t", file: !9, line: 28, baseType: !10) + !9 = !DIFile(filename: "lib/clang/13.0.0/include/arm_mve.h", directory: "/work/llvm-project/build") + !10 = !DICompositeType(tag: DW_TAG_array_type, baseType: !11, size: 128, flags: DIFlagVector, elements: !14) + !11 = !DIDerivedType(tag: DW_TAG_typedef, name: "int32_t", file: !12, line: 58, baseType: !13) + !12 = !DIFile(filename: "pb-rel/testabletools/include/stdint.h", directory: "/work") + !13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !14 = !{!15} + !15 = !DISubrange(count: 4) + !16 = !{!17, !18, !19, !20} + !17 = !DILocalVariable(name: "x", arg: 1, scope: !5, file: !1, line: 2, type: !8) + !18 = !DILocalVariable(name: "y", arg: 2, scope: !5, file: !1, line: 2, type: !8) + !19 = !DILocalVariable(name: "z", arg: 3, scope: !5, file: !1, line: 2, type: !8) + !20 = !DILocalVariable(name: "p", scope: !5, file: !1, line: 3, type: !13) + !21 = !DILocation(line: 0, scope: !5) + !22 = !DILocation(line: 3, column: 11, scope: !5) + !23 = !DILocation(line: 4, column: 7, scope: !5) + !24 = !DILocation(line: 5, column: 7, scope: !5) + !25 = !DILocation(line: 6, column: 7, scope: !5) + !26 = !DILocation(line: 7, column: 3, scope: !5) + +... +--- +name: test +tracksRegLiveness: true +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit killed $q1, implicit killed $q0, implicit killed $q2, debug-location !23 { + ; CHECK: MVE_VPTv4s32 12, renamable $q1, renamable $q0, 10, implicit-def $vpr, debug-location !23 + ; CHECK: renamable $q2 = MVE_VADDi32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q2, debug-location !23 + ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE internal $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: renamable $q2 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 2, internal killed renamable $vpr, internal killed renamable $q2, debug-location !25 + ; CHECK: DBG_VALUE internal $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: } + ; CHECK: $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, undef $q0, debug-location !26 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0, debug-location !26 + DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + renamable $vpr = MVE_VCMPs32 renamable $q1, renamable $q0, 10, 0, $noreg, debug-location !22 + DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + renamable $q2 = MVE_VADDi32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q2, debug-location !23 + DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg, debug-location !24 + renamable $q2 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q2, debug-location !25 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, undef $q0, debug-location !26 + tBX_RET 14 /* CC::al */, $noreg, implicit $q0, debug-location !26 + +... diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll 2021-07-09 07:09:48.000000000 +0000 @@ -70,8 +70,6 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-LABEL: vqdmulh_i16_c: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vmov.u16 r0, q0[2] ; CHECK-NEXT: vmov.u16 r1, q0[0] @@ -87,35 +85,32 @@ ; CHECK-NEXT: vmov q3[3], q3[1], r1, r0 ; CHECK-NEXT: vmov.u16 r1, q2[4] ; CHECK-NEXT: vmullb.s16 q0, q3, q0 -; CHECK-NEXT: vmov.i32 q3, #0x7fff ; CHECK-NEXT: vshl.i32 q0, q0, #10 ; CHECK-NEXT: vshr.s32 q0, q0, #10 -; CHECK-NEXT: vshr.s32 q0, q0, #15 -; CHECK-NEXT: vmin.s32 q4, q0, q3 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vshr.s32 q3, q0, #15 +; CHECK-NEXT: vmov r0, s12 ; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: vmov r0, s13 ; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r0, s14 ; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s15 ; CHECK-NEXT: vmov.16 q0[3], r0 ; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov q4[2], q4[0], r1, r0 +; CHECK-NEXT: vmov q3[2], q3[0], r1, r0 ; CHECK-NEXT: vmov.u16 r0, q2[7] ; CHECK-NEXT: vmov.u16 r1, q2[5] -; CHECK-NEXT: vmov q4[3], q4[1], r1, r0 +; CHECK-NEXT: vmov q3[3], q3[1], r1, r0 ; CHECK-NEXT: vmov.u16 r0, q1[6] ; CHECK-NEXT: vmov.u16 r1, q1[4] ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0 ; CHECK-NEXT: vmov.u16 r0, q1[7] ; CHECK-NEXT: vmov.u16 r1, q1[5] ; CHECK-NEXT: vmov q2[3], q2[1], r1, r0 -; CHECK-NEXT: vmullb.s16 q1, q2, q4 +; CHECK-NEXT: vmullb.s16 q1, q2, q3 ; CHECK-NEXT: vshl.i32 q1, q1, #10 ; CHECK-NEXT: vshr.s32 q1, q1, #10 ; CHECK-NEXT: vshr.s32 q1, q1, #15 -; CHECK-NEXT: vmin.s32 q1, q1, q3 ; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vmov.16 q0[4], r0 ; CHECK-NEXT: vmov r0, s5 @@ -124,7 +119,6 @@ ; CHECK-NEXT: vmov.16 q0[6], r0 ; CHECK-NEXT: vmov r0, s7 ; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %l2 = sext <8 x i16> %s0 to <8 x i22> diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll 2021-07-09 07:09:48.000000000 +0000 @@ -68,6 +68,9 @@ } ; Test invoke instruction with filters (functions with throw(...) declaration) +; Currently we don't support exception specifications correctly in JS glue code, +; so we ignore all filters here. +; See https://bugs.llvm.org/show_bug.cgi?id=50396. define void @filter() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @filter( entry: @@ -91,12 +94,9 @@ %2 = extractvalue { i8*, i32 } %0, 1 br label %filter.dispatch ; CHECK: lpad: -; CHECK-NEXT: %[[FMC:.*]] = call i8* @__cxa_find_matching_catch_4(i8* bitcast (i8** @_ZTIi to i8*), i8* bitcast (i8** @_ZTIc to i8*)) -; CHECK-NEXT: %[[IVI1:.*]] = insertvalue { i8*, i32 } undef, i8* %[[FMC]], 0 -; CHECK-NEXT: %[[TEMPRET0_VAL:.*]] = call i32 @getTempRet0() -; CHECK-NEXT: %[[IVI2:.*]] = insertvalue { i8*, i32 } %[[IVI1]], i32 %[[TEMPRET0_VAL]], 1 -; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 0 -; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 1 +; We now temporarily ignore filters because of the bug, so we pass nothing to +; __cxa_find_matching_catch +; CHECK-NEXT: %[[FMC:.*]] = call i8* @__cxa_find_matching_catch_2() filter.dispatch: ; preds = %lpad %ehspec.fails = icmp slt i32 %2, 0 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/freeze.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/freeze.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/freeze.ll 2020-10-16 21:13:09.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/freeze.ll 2021-07-09 07:09:48.000000000 +0000 @@ -122,3 +122,26 @@ %t1 = add i64 %v1, %v2 ret i64 %t1 } + +; Make sure we emit a movl to zext the input before the imulq. This previously +; failed because freeze was not listed in the instructions that don't zext their +; result in the def32 pattern X86InstrCompiler.td. +define i32 @freeze_zext(i64 %a) nounwind { +; X86ASM-LABEL: freeze_zext: +; X86ASM: # %bb.0: # %entry +; X86ASM-NEXT: movq %rdi, %rax +; X86ASM-NEXT: movl %eax, %ecx +; X86ASM-NEXT: movl $3435973837, %edx # imm = 0xCCCCCCCD +; X86ASM-NEXT: imulq %rcx, %rdx +; X86ASM-NEXT: shrq $35, %rdx +; X86ASM-NEXT: addl %edx, %edx +; X86ASM-NEXT: leal (%rdx,%rdx,4), %ecx +; X86ASM-NEXT: subl %ecx, %eax +; X86ASM-NEXT: # kill: def $eax killed $eax killed $rax +; X86ASM-NEXT: retq +entry: + %x = trunc i64 %a to i32 + %y = freeze i32 %x + %z = urem i32 %y, 10 + ret i32 %z +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll 2021-07-09 07:05:01.000000000 +0000 @@ -98,6 +98,17 @@ ret <8 x i32> %7 } +define <4 x double> @PR49971(<4 x double> %0) { +; CHECK-LABEL: PR49971: +; CHECK: ## %bb.0: +; CHECK-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %2 = tail call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %0, <4 x double> %0) + %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> + ret <4 x double> %3 +} + declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/stores-merging.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/stores-merging.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/stores-merging.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/stores-merging.ll 2021-07-09 07:05:02.000000000 +0000 @@ -14,7 +14,7 @@ ; CHECK-LABEL: redundant_stores_merging: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001 -; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) +; CHECK-NEXT: movq %rax, e+4(%rip) ; CHECK-NEXT: retq store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 @@ -27,8 +27,8 @@ ; CHECK-LABEL: redundant_stores_merging_reverse: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 -; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) -; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 +; CHECK-NEXT: movq %rax, e+4(%rip) +; CHECK-NEXT: movl $456, e+8(%rip) # imm = 0x1C8 ; CHECK-NEXT: retq store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 @@ -46,8 +46,8 @@ define dso_local void @overlapping_stores_merging() { ; CHECK-LABEL: overlapping_stores_merging: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $1, {{.*}}(%rip) -; CHECK-NEXT: movw $2, b+{{.*}}(%rip) +; CHECK-NEXT: movl $1, b(%rip) +; CHECK-NEXT: movw $2, b+3(%rip) ; CHECK-NEXT: retq store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2 store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1 @@ -612,3 +612,89 @@ store i32 %t0, i32* %p1, align 4 ret void } + +; https://llvm.org/PR50623 +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole: +; CHECK: # %bb.0: +; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: retq + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + ret void +} + +; Change the order of the stores. +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole2(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: movw %ax, 2(%rsi) +; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: retq + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p, align 1 + ret void +} + +; Change offset. +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole3(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole3: +; CHECK: # %bb.0: +; CHECK-NEXT: movb %dil, 1(%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: retq + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p1, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + ret void +} + +; Change offset. +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole4(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole4: +; CHECK: # %bb.0: +; CHECK-NEXT: movb %dil, 2(%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, (%rsi) +; CHECK-NEXT: retq + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i8, i8* %p, i64 2 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p2, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %pcast, align 1 + ret void +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/tailcc-ssp.ll llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/tailcc-ssp.ll --- llvm-toolchain-12-12.0.0/llvm/test/CodeGen/X86/tailcc-ssp.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/CodeGen/X86/tailcc-ssp.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=x86_64-windows-msvc %s -o - -verify-machineinstrs | FileCheck %s + +declare void @h(i8*, i64, i8*) + +define tailcc void @tailcall_frame(i8* %0, i64 %1) sspreq { +; CHECK-LABEL: tailcall_frame: +; CHECK: callq __security_check_cookie +; CHECK: xorl %ecx, %ecx +; CHECK: jmp h + + tail call tailcc void @h(i8* null, i64 0, i8* null) + ret void +} + +declare void @bar() +define void @tailcall_unrelated_frame() sspreq { +; CHECK-LABEL: tailcall_unrelated_frame: +; CHECK: subq [[STACK:\$.*]], %rsp +; CHECK: callq bar +; CHECK: callq __security_check_cookie +; CHECK: addq [[STACK]], %rsp +; CHECK: jmp bar + call void @bar() + tail call void @bar() + ret void +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/DebugInfo/implicit-const-test2.s llvm-toolchain-12-12.0.1/llvm/test/DebugInfo/implicit-const-test2.s --- llvm-toolchain-12-12.0.0/llvm/test/DebugInfo/implicit-const-test2.s 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/DebugInfo/implicit-const-test2.s 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,34 @@ +# REQUIRES: x86-registered-target + +# RUN: llvm-mc --filetype=obj --triple=x86_64-pc-linux %s -o %t.o -g + +# RUN: llvm-dwarfdump -v %t.o | FileCheck %s + +# CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_no +# CHECK-NEXT: DW_AT_language DW_FORM_implicit_const 29 + +# CHECK: 0x0000000c: DW_TAG_compile_unit [1] +# CHECK-NEXT: DW_AT_language [DW_FORM_implicit_const] (DW_LANG_C11) + + .section .debug_info,"",@progbits +.Ldebug_info0: + .long .Ldebug_info0_end - .Ldebug_info0_start # Length of Compilation Unit Info +.Ldebug_info0_start: + .value 0x5 # DWARF version number + .byte 0x1 # DW_UT_compile + .byte 0x8 # Pointer Size (in bytes) + .long .Ldebug_abbrev0 # Offset Into Abbrev. Section + .uleb128 0x1 # (DIE DW_TAG_compile_unit) + # DW_AT_language +.Ldebug_info0_end: + .section .debug_abbrev,"",@progbits +.Ldebug_abbrev0: + .uleb128 0x1 # (abbrev code) + .uleb128 0x11 # (TAG: DW_TAG_compile_unit) + .byte 0x0 # DW_children_no + .uleb128 0x13 # (DW_AT_language) + .uleb128 0x21 # (DW_FORM_implicit_const) + .sleb128 0x1d + .byte 0 + .byte 0 + .byte 0 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/MC/Mips/elf-relsym.s llvm-toolchain-12-12.0.1/llvm/test/MC/Mips/elf-relsym.s --- llvm-toolchain-12-12.0.0/llvm/test/MC/Mips/elf-relsym.s 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/MC/Mips/elf-relsym.s 2021-07-09 07:09:49.000000000 +0000 @@ -4,10 +4,16 @@ // CHECK: Symbols [ // CHECK: Symbol { -// CHECK: Name: .rodata.cst8 +// CHECK: Name: $.str // CHECK: } // CHECK: Symbol { -// CHECK: Name: .rodata.str1.1 +// CHECK: Name: $.str1 +// CHECK: } +// CHECK: Symbol { +// CHECK: Name: $CPI0_0 +// CHECK: } +// CHECK: Symbol { +// CHECK: Name: $CPI0_1 // CHECK: } // CHECK: ] diff -Nru llvm-toolchain-12-12.0.0/llvm/test/MC/Mips/mips_lo16.s llvm-toolchain-12-12.0.1/llvm/test/MC/Mips/mips_lo16.s --- llvm-toolchain-12-12.0.0/llvm/test/MC/Mips/mips_lo16.s 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/MC/Mips/mips_lo16.s 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,22 @@ +# PR49821: Check that R_MIPS_LO16 relocs do not wrap around with large addends. + +# RUN: llvm-mc %s -triple mips-unknown-unknown -filetype=obj | \ +# RUN: llvm-objdump -d -r --no-show-raw-insn - | \ +# RUN: FileCheck -check-prefix=MIPS32 %s + +# RUN: llvm-mc %s -triple mips64-unknown-unknown -filetype=obj | \ +# RUN: llvm-objdump -d -r --no-show-raw-insn - | \ +# RUN: FileCheck -check-prefix=MIPS64 %s + + .text +foo: + lui $2, %hi(bar) +# MIPS32: 00000000: R_MIPS_HI16 bar +# MIPS64: 0000000000000000: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE .rodata.str1.1+0x8000 + addiu $2, $2, %lo(bar) +# MIPS32: 00000004: R_MIPS_LO16 bar +# MIPS64: 0000000000000004: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE .rodata.str1.1+0x8000 + .section .rodata.str1.1,"aMS",@progbits,1 + .zero 0x8000 +bar: + .asciz "hello" diff -Nru llvm-toolchain-12-12.0.0/llvm/test/MC/Mips/xgot.s llvm-toolchain-12-12.0.1/llvm/test/MC/Mips/xgot.s --- llvm-toolchain-12-12.0.0/llvm/test/MC/Mips/xgot.s 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/MC/Mips/xgot.s 2021-07-09 07:05:02.000000000 +0000 @@ -10,8 +10,8 @@ // CHECK: 0x1C R_MIPS_GOT_LO16 ext_1 // CHECK: 0x24 R_MIPS_CALL_HI16 printf // CHECK: 0x30 R_MIPS_CALL_LO16 printf -// CHECK: 0x2C R_MIPS_GOT16 .rodata.str1.1 -// CHECK: 0x38 R_MIPS_LO16 .rodata.str1.1 +// CHECK: 0x2C R_MIPS_GOT16 $.str +// CHECK: 0x38 R_MIPS_LO16 $.str // CHECK: ] .text diff -Nru llvm-toolchain-12-12.0.0/llvm/test/tools/llvm-cov/branch-templates.cpp llvm-toolchain-12-12.0.1/llvm/test/tools/llvm-cov/branch-templates.cpp --- llvm-toolchain-12-12.0.0/llvm/test/tools/llvm-cov/branch-templates.cpp 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/tools/llvm-cov/branch-templates.cpp 2021-07-09 07:05:03.000000000 +0000 @@ -1,9 +1,9 @@ // RUN: llvm-profdata merge %S/Inputs/branch-templates.proftext -o %t.profdata // RUN: llvm-cov show --show-expansions --show-branches=count %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s // RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -show-functions -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORT +// RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORTFILE #include - template void unused(T x) { return; @@ -45,3 +45,17 @@ // REPORT-NEXT: _Z4funcIfEiT_ 5 2 60.00% 7 3 57.14% 2 1 50.00% // REPORT-NEXT: --- // REPORT-NEXT: TOTAL 22 7 68.18% 31 11 64.52% 12 6 50.00% + +// Make sure the covered branch tally for the function instantiation group is +// merged to reflect maximum branch coverage of a single instantiation, just +// like what is done for lines and regions. Also, the total branch tally +// summary for an instantiation group should agree with the total number of +// branches in the definition (In this case, 2 and 6 for func<>() and main(), +// respectively). This is returned by: FunctionCoverageSummary::get(const +// InstantiationGroup &Group, ...) + +// REPORTFILE: Filename Regions Missed Regions Cover Functions Missed Functions Executed Lines Missed Lines Cover Branches Missed Branches Cover +// REPORTFILE-NEXT: --- +// REPORTFILE-NEXT: branch-templates.cpp 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% +// REPORTFILE-NEXT: --- +// REPORTFILE-NEXT: TOTAL 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll 2021-07-09 07:09:49.000000000 +0000 @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s -; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS +; RUN: opt -O1 -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s +; RUN: opt -O1 -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS define void @atomic_swap_f16(half* %ptr, half %val) nounwind { ; CHECK-LABEL: @atomic_swap_f16( diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/ConstantMerge/dont-merge.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/ConstantMerge/dont-merge.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/ConstantMerge/dont-merge.ll 2020-10-16 21:13:10.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/ConstantMerge/dont-merge.ll 2021-07-09 07:05:02.000000000 +0000 @@ -80,3 +80,15 @@ store i32* @T4D2, i32** %P8 ret void } + +; CHECK: @T5tls +; CHECK: @T5ua + +@T5tls = private thread_local constant i32 555 +@T5ua = private unnamed_addr constant i32 555 + +define void @test5(i32** %P1, i32** %P2) { + store i32* @T5tls, i32** %P1 + store i32* @T5ua, i32** %P2 + ret void +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/Coroutines/coro-byval-param.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/Coroutines/coro-byval-param.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/Coroutines/coro-byval-param.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/Coroutines/coro-byval-param.ll 2021-07-09 07:09:49.000000000 +0000 @@ -0,0 +1,127 @@ +; RUN: opt < %s -passes=coro-split -S | FileCheck %s +%promise_type = type { i8 } +%struct.A = type <{ i64, i64, i32, [4 x i8] }> + +; Function Attrs: noinline ssp uwtable mustprogress +define %promise_type* @foo(%struct.A* nocapture readonly byval(%struct.A) align 8 %a1) #0 { +entry: + %__promise = alloca %promise_type, align 1 + %a2 = alloca %struct.A, align 8 + %0 = getelementptr inbounds %promise_type, %promise_type* %__promise, i64 0, i32 0 + %1 = call token @llvm.coro.id(i32 16, i8* nonnull %0, i8* bitcast (%promise_type* (%struct.A*)* @foo to i8*), i8* null) + %2 = call i1 @llvm.coro.alloc(token %1) + br i1 %2, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %3 = call i64 @llvm.coro.size.i64() + %call = call noalias nonnull i8* @_Znwm(i64 %3) #9 + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %4 = phi i8* [ null, %entry ], [ %call, %coro.alloc ] + %5 = call i8* @llvm.coro.begin(token %1, i8* %4) #10 + %6 = bitcast %struct.A* %a1 to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #2 + %call2 = call %promise_type* @_ZN4task12promise_type17get_return_objectEv(%promise_type* nonnull dereferenceable(1) %__promise) + call void @initial_suspend(%promise_type* nonnull dereferenceable(1) %__promise) + %7 = call token @llvm.coro.save(i8* null) + call fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8* %5) #2 + %8 = call i8 @llvm.coro.suspend(token %7, i1 false) + switch i8 %8, label %coro.ret [ + i8 0, label %init.ready + i8 1, label %cleanup33 + ] + +init.ready: ; preds = %coro.init + %9 = bitcast %struct.A* %a2 to i8* + call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %9) #2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %6, i64 24, i1 false) + call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %9) #2 + call void @_ZN4task12promise_type13final_suspendEv(%promise_type* nonnull dereferenceable(1) %__promise) #2 + %10 = call token @llvm.coro.save(i8* null) + call fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8* %5) #2 + %11 = call i8 @llvm.coro.suspend(token %10, i1 true) #10 + %switch = icmp ult i8 %11, 2 + br i1 %switch, label %cleanup33, label %coro.ret + +cleanup33: ; preds = %init.ready, %coro.init + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) #2 + %12 = call i8* @llvm.coro.free(token %1, i8* %5) + %.not = icmp eq i8* %12, null + br i1 %.not, label %coro.ret, label %coro.free + +coro.free: ; preds = %cleanup33 + call void @_ZdlPv(i8* nonnull %12) #2 + br label %coro.ret + +coro.ret: ; preds = %coro.free, %cleanup33, %init.ready, %coro.init + %13 = call i1 @llvm.coro.end(i8* null, i1 false) #10 + ret %promise_type* %call2 +} + +; check that the frame contains the entire struct, instead of just the struct pointer +; CHECK: %foo.Frame = type { void (%foo.Frame*)*, void (%foo.Frame*)*, %promise_type, %struct.A, i1 } + +; Function Attrs: argmemonly nounwind readonly +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 + +; Function Attrs: nounwind +declare i1 @llvm.coro.alloc(token) #2 + +; Function Attrs: nobuiltin nofree allocsize(0) +declare nonnull i8* @_Znwm(i64) local_unnamed_addr #3 + +; Function Attrs: nounwind readnone +declare i64 @llvm.coro.size.i64() #4 + +; Function Attrs: nounwind +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #6 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare %promise_type* @_ZN4task12promise_type17get_return_objectEv(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare void @initial_suspend(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2 + +; Function Attrs: nounwind +declare token @llvm.coro.save(i8*) #2 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare hidden fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8*) unnamed_addr #7 align 2 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 + +; Function Attrs: nounwind +declare i8 @llvm.coro.suspend(token, i1) #2 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare void @_ZN4task12promise_type13final_suspendEv(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2 + +; Function Attrs: nounwind +declare i1 @llvm.coro.end(i8*, i1) #2 + +; Function Attrs: nobuiltin nounwind +declare void @_ZdlPv(i8*) local_unnamed_addr #8 + +; Function Attrs: argmemonly nounwind readonly +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 + +attributes #0 = { noinline ssp uwtable mustprogress "coroutine.presplit"="1" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nobuiltin nofree allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind readnone } +attributes #5 = { argmemonly nofree nosync nounwind willreturn } +attributes #6 = { argmemonly nofree nounwind willreturn } +attributes #7 = { noinline nounwind ssp uwtable willreturn mustprogress "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #8 = { nobuiltin nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #9 = { allocsize(0) } +attributes #10 = { noduplicate } + diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/Coroutines/coro-noalias-param.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/Coroutines/coro-noalias-param.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/Coroutines/coro-noalias-param.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/Coroutines/coro-noalias-param.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,40 @@ +; RUN: opt < %s -S -passes=coro-early | FileCheck %s +%struct.A = type <{ i64, i64, i32, [4 x i8] }> + +define void @f(%struct.A* nocapture readonly noalias align 8 %a) { + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @print(i32 0) + %s1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %s1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i32 1) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret void +} + +; check that the noalias attribute is removed from the argument +; CHECK: define void @f(%struct.A* nocapture readonly align 8 %a) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @print(i32) +declare void @free(i8*) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/GVN/storeinvgroup.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/GVN/storeinvgroup.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/GVN/storeinvgroup.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/GVN/storeinvgroup.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S -o - < %s | FileCheck %s + +define double @code(double* %a1) { +; CHECK-LABEL: @code( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[META:%.*]] = alloca double*, align 8 +; CHECK-NEXT: store double 1.234500e+00, double* [[A1:%.*]], align 8 +; CHECK-NEXT: store double* [[A1]], double** [[META]], align 8, !invariant.group !0 +; CHECK-NEXT: ret double 1.234500e+00 +; +entry: + %meta = alloca double* + store double 1.23450000e+00, double* %a1, align 8 + store double* %a1, double** %meta, align 8, !invariant.group !0 + %iload = load double, double* %a1, align 8, !invariant.group !1 + ret double %iload +} + +!0 = distinct !{} +!1 = distinct !{} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll 2020-10-16 21:13:10.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll 2021-07-09 07:05:02.000000000 +0000 @@ -436,3 +436,59 @@ } declare void @side_effect() + +; The exit condition %outer.cond.1 depends on a phi in %inner. Make sure we do +; not incorrectly determine %x.lcssa <= -1. +define i32 @exit_cond_depends_on_inner_loop() { +; CHECK-LABEL: @exit_cond_depends_on_inner_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[IV_OUTER:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_OUTER_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ -1, [[OUTER_HEADER]] ], [ [[CALL:%.*]], [[INNER]] ] +; CHECK-NEXT: [[CALL]] = call i32 @match() +; CHECK-NEXT: [[INNER_COND:%.*]] = icmp sgt i32 [[CALL]], -1 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER]], label [[OUTER_EXITING_1:%.*]] +; CHECK: outer.exiting.1: +; CHECK-NEXT: [[X_LCSSA:%.*]] = phi i32 [ [[X]], [[INNER]] ] +; CHECK-NEXT: [[OUTER_COND_1:%.*]] = icmp sgt i32 [[X_LCSSA]], -1 +; CHECK-NEXT: br i1 [[OUTER_COND_1]], label [[EXIT:%.*]], label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[IV_OUTER_NEXT]] = add nuw nsw i32 [[IV_OUTER]], 1 +; CHECK-NEXT: [[OUTER_COND_2:%.*]] = icmp ult i32 [[IV_OUTER]], 100 +; CHECK-NEXT: br i1 [[OUTER_COND_2]], label [[OUTER_HEADER]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[X_RES:%.*]] = phi i32 [ [[X_LCSSA]], [[OUTER_EXITING_1]] ], [ -1, [[OUTER_LATCH]] ] +; CHECK-NEXT: ret i32 [[X_RES]] +; +entry: + br label %outer.header + +outer.header: + %iv.outer = phi i32 [ 0, %entry ], [ %iv.outer.next , %outer.latch ] + br label %inner + +inner: + %x = phi i32 [ -1, %outer.header ], [ %call, %inner ] + %call = call i32 @match() + %inner.cond = icmp sgt i32 %call, -1 + br i1 %inner.cond, label %inner, label %outer.exiting.1 + +outer.exiting.1: + %x.lcssa = phi i32 [ %x, %inner ] + %outer.cond.1 = icmp sgt i32 %x.lcssa, -1 + br i1 %outer.cond.1, label %exit, label %outer.latch + +outer.latch: + %iv.outer.next = add nuw nsw i32 %iv.outer, 1 + %outer.cond.2 = icmp ult i32 %iv.outer, 100 + br i1 %outer.cond.2, label %outer.header, label %exit + +exit: + %x.res = phi i32 [ %x.lcssa, %outer.exiting.1 ], [ -1, %outer.latch ] + ret i32 %x.res +} + +declare i32 @match() diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll 2021-07-09 07:05:02.000000000 +0000 @@ -373,4 +373,66 @@ br i1 %loopcond, label %loopexit, label %loop } +define void @promote_latch_condition_decrementing_loop_05(i32* %p, i32* %a, i1 %cond) { +; CHECK-LABEL: @promote_latch_condition_decrementing_loop_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: [[LEN_MINUS_1:%.*]] = add nsw i32 [[LEN]], -1 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[IV_START:%.*]] = phi i32 [ [[LEN]], [[IF_TRUE]] ], [ [[LEN_MINUS_1]], [[IF_FALSE]] ] +; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0 +; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[IV_START]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loopexit.loopexit: +; CHECK-NEXT: br label [[LOOPEXIT]] +; CHECK: loopexit: +; CHECK-NEXT: ret void +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ] +; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] +; + +entry: + %len = load i32, i32* %p, align 4, !range !0 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + %len.minus.1 = add nsw i32 %len, -1 + br label %merge + +merge: + %iv_start = phi i32 [ %len, %if.true ], [%len.minus.1, %if.false ] + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %iv_start, %preheader ] + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + %loopcond = icmp slt i32 %iv, 1 + br i1 %loopcond, label %loopexit, label %loop +} + !0 = !{i32 0, i32 2147483647} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/Inline/pr50270.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/Inline/pr50270.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/Inline/pr50270.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/Inline/pr50270.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -inline < %s | FileCheck %s + +; This tests cases where instructions in the callee are simplified to +; instructions in the caller, thus making VMap contain instructions from +; the caller. We should not be assigning incorrect noalias metadata in +; that case. + +declare { i64* } @opaque_callee() + +define { i64* } @callee(i64* %x) { +; CHECK-LABEL: @callee( +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64* } undef, i64* [[X:%.*]], 0 +; CHECK-NEXT: ret { i64* } [[RES]] +; + %res = insertvalue { i64* } undef, i64* %x, 0 + ret { i64* } %res +} + +; @opaque_callee() should not receive noalias metadata here. +define void @caller() { +; CHECK-LABEL: @caller( +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: [[S:%.*]] = call { i64* } @opaque_callee() +; CHECK-NEXT: [[X:%.*]] = extractvalue { i64* } [[S]], 0 +; CHECK-NEXT: ret void +; + call void @llvm.experimental.noalias.scope.decl(metadata !0) + %s = call { i64* } @opaque_callee() + %x = extractvalue { i64* } %s, 0 + call { i64* } @callee(i64* %x), !noalias !0 + ret void +} + +; @opaque_callee() should no the same noalias metadata as the load from the +; else branch, not as the load in the if branch. +define { i64* } @self_caller(i1 %c, i64* %a) { +; CHECK-LABEL: @self_caller( +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[S:%.*]] = call { i64* } @opaque_callee(), !noalias !0 +; CHECK-NEXT: [[X:%.*]] = extractvalue { i64* } [[S]], 0 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3) +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i64, i64* [[X]], align 4, !alias.scope !3 +; CHECK-NEXT: ret { i64* } [[S]] +; CHECK: else: +; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64* } undef, i64* [[A:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, i64* [[A]], align 4, !alias.scope !0 +; CHECK-NEXT: ret { i64* } [[R2]] +; + call void @llvm.experimental.noalias.scope.decl(metadata !0) + br i1 %c, label %if, label %else + +if: + %s = call { i64* } @opaque_callee(), !noalias !0 + %x = extractvalue { i64* } %s, 0 + %r = call { i64* } @self_caller(i1 false, i64* %x) + ret { i64* } %r + +else: + %r2 = insertvalue { i64* } undef, i64* %a, 0 + load volatile i64, i64* %a, !alias.scope !0 + ret { i64* } %r2 +} + +declare void @llvm.experimental.noalias.scope.decl(metadata) + +!0 = !{!1} +!1 = !{!1, !2, !"scope"} +!2 = !{!2, !"domain"} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstCombine/not-add.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstCombine/not-add.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstCombine/not-add.ll 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstCombine/not-add.ll 2021-07-09 07:09:49.000000000 +0000 @@ -137,3 +137,31 @@ %nota = xor <4 x i32> %a, ret <4 x i32> %nota } + +define i32 @pr50308(i1 %c1, i32 %v1, i32 %v2, i32 %v3) { +; CHECK-LABEL: @pr50308( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: [[ADD_NOT:%.*]] = sub i32 -2, [[V1:%.*]] +; CHECK-NEXT: [[ADD1_NEG:%.*]] = xor i32 [[ADD_NOT]], [[V2:%.*]] +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND_NEG:%.*]] = phi i32 [ [[ADD1_NEG]], [[COND_TRUE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[COND_NEG]], [[V3:%.*]] +; CHECK-NEXT: ret i32 [[SUB]] +; +entry: + br i1 %c1, label %cond.true, label %cond.end + +cond.true: + %add = add nsw i32 1, %v1 + %xor = xor i32 %add, %v2 + %add1 = add nsw i32 1, %xor + br label %cond.end + +cond.end: + %cond = phi i32 [ %add1, %cond.true ], [ 0, %entry ] + %sub = sub nsw i32 %v3, %cond + ret i32 %sub +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; PR49778: this should not be folded to 0. +define i32 @src(i1 %x2) { +; CHECK-LABEL: @src( +; CHECK-NEXT: [[X13:%.*]] = zext i1 [[X2:%.*]] to i32 +; CHECK-NEXT: [[_7:%.*]] = shl i32 -1, [[X13]] +; CHECK-NEXT: [[MASK:%.*]] = xor i32 [[_7]], -1 +; CHECK-NEXT: [[_8:%.*]] = and i32 [[MASK]], [[X13]] +; CHECK-NEXT: [[_9:%.*]] = shl i32 [[_8]], [[X13]] +; CHECK-NEXT: ret i32 [[_9]] +; + %x13 = zext i1 %x2 to i32 + %_7 = shl i32 4294967295, %x13 + %mask = xor i32 %_7, 4294967295 + %_8 = and i32 %mask, %x13 + %_9 = shl i32 %_8, %x13 + ret i32 %_9 +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstCombine/select-binop-cmp.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstCombine/select-binop-cmp.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstCombine/select-binop-cmp.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstCombine/select-binop-cmp.ll 2021-07-09 07:09:49.000000000 +0000 @@ -551,10 +551,12 @@ ret i32 %C } +; Value equivalence substitution is all-or-nothing, so needs a scalar compare. + define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_bad( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[Z:%.*]], +; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] ; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; @@ -564,6 +566,21 @@ ret <2 x i8> %C } +; Value equivalence substitution is all-or-nothing, so needs a scalar compare. + +define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x) { +; CHECK-LABEL: @vec_select_no_equivalence( +; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[X10]], <2 x i32> [[X]] +; CHECK-NEXT: ret <2 x i32> [[S]] +; + %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> + %cond = icmp eq <2 x i32> %x, zeroinitializer + %s = select <2 x i1> %cond, <2 x i32> %x10, <2 x i32> %x + ret <2 x i32> %s +} + ; Folding this would only be legal if we sanitized undef to 0. define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_undef( diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstSimplify/select.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstSimplify/select.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/InstSimplify/select.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/InstSimplify/select.ll 2021-07-09 07:09:49.000000000 +0000 @@ -969,6 +969,19 @@ ret %s } +define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @vec_select_no_equivalence( +; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[X10]], <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i32> [[S]] +; + %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> + %cond = icmp eq <2 x i32> %x, zeroinitializer + %s = select <2 x i1> %cond, <2 x i32> %x10, <2 x i32> zeroinitializer + ret <2 x i32> %s +} + ; TODO: these can be optimized more define i32 @poison(i32 %x, i32 %y) { diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/IRCE/decrementing-loop.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/IRCE/decrementing-loop.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/IRCE/decrementing-loop.ll 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/IRCE/decrementing-loop.ll 2021-07-09 07:09:49.000000000 +0000 @@ -212,16 +212,17 @@ ret void } +; TODO: we need to be more careful when trying to look through phi nodes in +; cycles, because the condition to prove may reference the previous value of +; the phi. So we currently fail to optimize this case. ; Check that we can figure out that IV is non-negative via implication through ; two Phi nodes, one being AddRec. define void @test_05(i32* %a, i32* %a_len_ptr, i1 %cond) { ; CHECK-LABEL: test_05 -; CHECK: mainloop: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK: br i1 true, label %in.bounds, label %out.of.bounds -; CHECK: loop.preloop: +; CHECK: entry: +; CHECK: br label %merge +; CHECK-NOT: mainloop entry: %len.a = load i32, i32* %a_len_ptr, !range !0 diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/LoopReroll/extra_instr.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/LoopReroll/extra_instr.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/LoopReroll/extra_instr.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/LoopReroll/extra_instr.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,268 @@ +; RUN: opt -S -loop-reroll %s | FileCheck %s +target triple = "aarch64--linux-gnu" + +define void @rerollable1([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv +; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4 +; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; NO unrerollable instructions + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment + %iv.next = add nuw nsw i64 %iv, 1 + + ; latch + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable1([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 +; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; unrerollable instructions using %iv + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment + %iv.next = add nuw nsw i64 %iv, 1 + + ; latch + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable2([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 +; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 +; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; loop-increment + %iv.next = add nuw nsw i64 %iv, 1 + + ; unrerollable instructions using %iv.next + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; latch + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define dso_local void @rerollable2() { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} +; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; NO unrerollable instructions + + ; root set 1 + + ; base instruction + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root instruction 0 + %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 + %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 + tail call void @bar(i32 %iv.scaled.add1.div5) + ; root instruction 2 + %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 + %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 + tail call void @bar(i32 %iv.scaled.add2.div5) + + ; root set 2 + + ; base instruction + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root instruction 0 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root instruction 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; loop-increment + %iv.next = add nuw nsw i32 %iv, 1 + + ; latch + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +define dso_local void @unrerollable3() { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 +; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 +; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 +; CHECK-NEXT: tail call void @bar(i32 %iv.mul7) + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; unrerollable instructions using %iv + %iv.mul7 = mul nuw nsw i32 %iv, 7 + tail call void @bar(i32 %iv.mul7) + + ; root set 1 + + ; base instruction + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root instruction 0 + %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 + %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 + tail call void @bar(i32 %iv.scaled.add1.div5) + ; root instruction 2 + %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 + %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 + tail call void @bar(i32 %iv.scaled.add2.div5) + + ; root set 2 + + ; base instruction + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root instruction 0 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root instruction 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; loop-increment + %iv.next = add nuw nsw i32 %iv, 1 + + ; latch + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +declare dso_local void @bar(i32) diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll 2020-10-16 21:13:10.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -; RUN: opt -loop-vectorize -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define void @Test(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p) #0 { -entry: - br label %for.body - -; CHECK-LABEL: @Test -; CHECK: <4 x i32> - -for.body: ; preds = %cond.end, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ] - %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !1 - %cmp1 = icmp eq i32 %0, 0 - %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv - %1 = load i32, i32* %arrayidx3, align 4, !llvm.access.group !1 - br i1 %cmp1, label %cond.end, label %cond.false - -cond.false: ; preds = %for.body - %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv - %2 = load i32, i32* %arrayidx7, align 4, !llvm.access.group !1 - %add = add nsw i32 %2, %1 - br label %cond.end - -cond.end: ; preds = %for.body, %cond.false - %cond = phi i32 [ %add, %cond.false ], [ %1, %for.body ] - store i32 %cond, i32* %arrayidx3, align 4, !llvm.access.group !1 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 16 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %cond.end - ret void -} - -attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } - -!0 = distinct !{!0, !{!"llvm.loop.parallel_accesses", !1}} -!1 = distinct !{} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll 2020-10-16 21:13:10.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll 2021-07-09 07:05:02.000000000 +0000 @@ -51,7 +51,7 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !8 } -; Case2: With pragma assume_safety only the store is masked. +; Case2: With pragma assume_safety both, load and store are masked. ; void assume_safety(int * p, int * q1, int * q2, int guard) { ; #pragma clang loop vectorize(assume_safety) ; for(int ix=0; ix < 1021; ++ix) { @@ -63,7 +63,7 @@ ;CHECK-LABEL: @assume_safety ;CHECK: vector.body: -;CHECK-NOT: @llvm.masked.load +;CHECK: call <8 x i32> @llvm.masked.load ;CHECK: call void @llvm.masked.store ; Function Attrs: norecurse nounwind uwtable diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll 2020-10-16 21:13:10.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll 2021-07-09 07:05:02.000000000 +0000 @@ -112,3 +112,11 @@ ret i1 %res6 } + +@real_mode_blob_end = external dso_local global [0 x i8], align 1 +define i1 @global_array() { +; CHECK-LABEL: @global_array( +; CHECK-NEXT: ret i1 false + %1 = call i1 @llvm.is.constant.i64(i64 ptrtoint ([0 x i8]* @real_mode_blob_end to i64)) + ret i1 %1 +} diff -Nru llvm-toolchain-12-12.0.0/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll llvm-toolchain-12-12.0.1/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll --- llvm-toolchain-12-12.0.0/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll 2021-07-09 07:05:02.000000000 +0000 @@ -0,0 +1,854 @@ +; RUN: opt < %s -ipsccp -disable-output +; PR49582: This test checks for an iterator invalidation issue, which only gets +; exposed on a large-enough test case. We intentionally do not check the output. + +@c = external dso_local global i32*, align 8 +@d = external dso_local global i32, align 4 + +define void @f(i32 %i) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end628, %entry + %e.0 = phi i32 [ 1, %entry ], [ %e.15, %if.end628 ] + %cmp = icmp slt i32 %e.0, %i + call void @llvm.assume(i1 %cmp) + %0 = load i32*, i32** @c, align 8 + %tobool = icmp ne i32* %0, null + br i1 %tobool, label %if.then, label %if.end628 + +if.then: ; preds = %for.cond + %1 = load i32, i32* %0, align 4 + %tobool1 = icmp ne i32 %1, 0 + br i1 %tobool1, label %if.then2, label %if.else78 + +if.then2: ; preds = %if.then + %add = add nsw i32 %e.0, 1 + %cmp3 = icmp sge i32 %add, %i + br i1 %cmp3, label %if.then4, label %if.end + +if.then4: ; preds = %if.then2 + %idxprom = sext i32 %add to i64 + br label %if.end + +if.end: ; preds = %if.then4, %if.then2 + br i1 %cmp3, label %if.then9, label %if.end13 + +if.then9: ; preds = %if.end + %idxprom11 = sext i32 %add to i64 + br label %if.end13 + +if.end13: ; preds = %if.then9, %if.end + br i1 %cmp3, label %if.then16, label %if.end20 + +if.then16: ; preds = %if.end13 + %idxprom18 = sext i32 %add to i64 + br label %if.end20 + +if.end20: ; preds = %if.then16, %if.end13 + %add21 = add nsw i32 %e.0, 3 + %cmp22 = icmp sge i32 %add21, %i + br i1 %cmp22, label %if.then23, label %if.end25 + +if.then23: ; preds = %if.end20 + br label %if.end25 + +if.end25: ; preds = %if.then23, %if.end20 + %e.1 = phi i32 [ %add21, %if.then23 ], [ %e.0, %if.end20 ] + %cmp26 = icmp sge i32 %e.1, %i + br i1 %cmp26, label %if.then27, label %if.end28 + +if.then27: ; preds = %if.end25 + %inc = add nsw i32 %e.1, 1 + br label %if.end28 + +if.end28: ; preds = %if.then27, %if.end25 + %e.2 = phi i32 [ %inc, %if.then27 ], [ %e.1, %if.end25 ] + %add29 = add nsw i32 %e.2, 2 + %cmp30 = icmp sge i32 %add29, %i + br i1 %cmp30, label %if.then31, label %if.end33 + +if.then31: ; preds = %if.end28 + br label %if.end33 + +if.end33: ; preds = %if.then31, %if.end28 + %e.3 = phi i32 [ %add29, %if.then31 ], [ %e.2, %if.end28 ] + %cmp34 = icmp sge i32 %e.3, %i + br i1 %cmp34, label %if.then35, label %if.end38 + +if.then35: ; preds = %if.end33 + %idxprom36 = sext i32 %e.3 to i64 + br label %if.end38 + +if.end38: ; preds = %if.then35, %if.end33 + br i1 %cmp34, label %if.then40, label %if.end43 + +if.then40: ; preds = %if.end38 + %idxprom41 = sext i32 %e.3 to i64 + br label %if.end43 + +if.end43: ; preds = %if.then40, %if.end38 + br i1 %cmp34, label %if.then45, label %if.end47 + +if.then45: ; preds = %if.end43 + %inc46 = add nsw i32 %e.3, 1 + br label %if.end47 + +if.end47: ; preds = %if.then45, %if.end43 + %e.4 = phi i32 [ %inc46, %if.then45 ], [ %e.3, %if.end43 ] + %cmp48 = icmp sge i32 %e.4, %i + br i1 %cmp48, label %if.then49, label %if.end51 + +if.then49: ; preds = %if.end47 + %inc50 = add nsw i32 %e.4, 1 + br label %if.end51 + +if.end51: ; preds = %if.then49, %if.end47 + %e.5 = phi i32 [ %inc50, %if.then49 ], [ %e.4, %if.end47 ] + %2 = load i32*, i32** @c, align 8 + %tobool52 = icmp ne i32* %2, null + br i1 %tobool52, label %if.then53, label %if.else + +if.then53: ; preds = %if.end51 + %cmp54 = icmp sge i32 %e.5, %i + br i1 %cmp54, label %if.then55, label %if.end628 + +if.then55: ; preds = %if.then53 + unreachable + +if.else: ; preds = %if.end51 + %3 = load i32, i32* @d, align 4 + %tobool57 = icmp ne i32 %3, 0 + br i1 %tobool57, label %if.then58, label %if.else68 + +if.then58: ; preds = %if.else + %cmp59 = icmp sge i32 %e.5, %i + br i1 %cmp59, label %if.then60, label %if.end62 + +if.then60: ; preds = %if.then58 + %inc61 = add nsw i32 %e.5, 1 + br label %if.end62 + +if.end62: ; preds = %if.then60, %if.then58 + %e.6 = phi i32 [ %inc61, %if.then60 ], [ %e.5, %if.then58 ] + %add63 = add nsw i32 %e.6, 1 + %cmp64 = icmp sge i32 %add63, %i + br i1 %cmp64, label %if.then65, label %if.end628 + +if.then65: ; preds = %if.end62 + br label %if.end628 + +if.else68: ; preds = %if.else + %add69 = add nsw i32 %e.5, 2 + %cmp70 = icmp sge i32 %add69, %i + br i1 %cmp70, label %if.then71, label %if.end628 + +if.then71: ; preds = %if.else68 + %idxprom73 = sext i32 %add69 to i64 + br label %if.end628 + +if.else78: ; preds = %if.then + %call = call i32 @g() + %tobool79 = icmp ne i32 %call, 0 + br i1 %tobool79, label %if.then80, label %if.else123 + +if.then80: ; preds = %if.else78 + %add81 = add nsw i32 %e.0, 3 + %cmp82 = icmp sge i32 %add81, %i + br i1 %cmp82, label %if.then83, label %if.end87 + +if.then83: ; preds = %if.then80 + %idxprom85 = sext i32 %add81 to i64 + br label %if.end87 + +if.end87: ; preds = %if.then83, %if.then80 + br i1 %cmp82, label %if.then90, label %if.end94 + +if.then90: ; preds = %if.end87 + %idxprom92 = sext i32 %add81 to i64 + br label %if.end94 + +if.end94: ; preds = %if.then90, %if.end87 + br i1 %cmp82, label %if.then97, label %if.end99 + +if.then97: ; preds = %if.end94 + br label %if.end99 + +if.end99: ; preds = %if.then97, %if.end94 + %e.7 = phi i32 [ %add81, %if.then97 ], [ %e.0, %if.end94 ] + %cmp100 = icmp sge i32 %e.7, %i + br i1 %cmp100, label %if.then101, label %if.end103 + +if.then101: ; preds = %if.end99 + %inc102 = add nsw i32 %e.7, 1 + br label %if.end103 + +if.end103: ; preds = %if.then101, %if.end99 + %e.8 = phi i32 [ %inc102, %if.then101 ], [ %e.7, %if.end99 ] + %add104 = add nsw i32 %e.8, 1 + %cmp105 = icmp sge i32 %add104, %i + br i1 %cmp105, label %if.then106, label %if.end108 + +if.then106: ; preds = %if.end103 + br label %if.end108 + +if.end108: ; preds = %if.then106, %if.end103 + %e.9 = phi i32 [ %add104, %if.then106 ], [ %e.8, %if.end103 ] + %cmp109 = icmp sge i32 %e.9, %i + br i1 %cmp109, label %if.then110, label %if.end113 + +if.then110: ; preds = %if.end108 + %idxprom111 = sext i32 %e.9 to i64 + br label %if.end113 + +if.end113: ; preds = %if.then110, %if.end108 + br i1 %cmp109, label %if.then115, label %if.end118 + +if.then115: ; preds = %if.end113 + %idxprom116 = sext i32 %e.9 to i64 + unreachable + +if.end118: ; preds = %if.end113 + br i1 %cmp109, label %if.then120, label %if.end628 + +if.then120: ; preds = %if.end118 + br label %if.end628 + +if.else123: ; preds = %if.else78 + %call124 = call i32 @g() + %tobool125 = icmp ne i32 %call124, 0 + br i1 %tobool125, label %if.then126, label %if.end628 + +if.then126: ; preds = %if.else123 + %call127 = call i32 @g() + %tobool128 = icmp ne i32 %call127, 0 + br i1 %tobool128, label %if.then129, label %if.else164 + +if.then129: ; preds = %if.then126 + %add130 = add nsw i32 %e.0, 1 + %cmp131 = icmp sge i32 %add130, %i + br i1 %cmp131, label %if.then132, label %if.end134 + +if.then132: ; preds = %if.then129 + br label %if.end134 + +if.end134: ; preds = %if.then132, %if.then129 + %e.10 = phi i32 [ %add130, %if.then132 ], [ %e.0, %if.then129 ] + %cmp135 = icmp sge i32 %e.10, %i + br i1 %cmp135, label %if.then136, label %if.end139 + +if.then136: ; preds = %if.end134 + %idxprom137 = sext i32 %e.10 to i64 + br label %if.end139 + +if.end139: ; preds = %if.then136, %if.end134 + br i1 %cmp135, label %if.then141, label %if.end144 + +if.then141: ; preds = %if.end139 + %idxprom142 = sext i32 %e.10 to i64 + br label %if.end144 + +if.end144: ; preds = %if.then141, %if.end139 + br i1 %cmp135, label %if.then146, label %if.end149 + +if.then146: ; preds = %if.end144 + %idxprom147 = sext i32 %e.10 to i64 + br label %if.end149 + +if.end149: ; preds = %if.then146, %if.end144 + br i1 %cmp135, label %if.then151, label %if.else154 + +if.then151: ; preds = %if.end149 + %idxprom152 = sext i32 %e.10 to i64 + br label %if.end160 + +if.else154: ; preds = %if.end149 + %idxprom157 = sext i32 %e.10 to i64 + br label %if.end160 + +if.end160: ; preds = %if.else154, %if.then151 + br i1 %cmp135, label %if.then162, label %if.end628 + +if.then162: ; preds = %if.end160 + unreachable + +if.else164: ; preds = %if.then126 + %4 = load i32*, i32** @c, align 8 + %tobool165 = icmp ne i32* %4, null + br i1 %tobool165, label %if.then166, label %if.else195 + +if.then166: ; preds = %if.else164 + %add167 = add nsw i32 %e.0, 1 + %cmp168 = icmp sge i32 %add167, %i + br i1 %cmp168, label %if.then169, label %if.end173 + +if.then169: ; preds = %if.then166 + %idxprom171 = sext i32 %add167 to i64 + br label %if.end173 + +if.end173: ; preds = %if.then169, %if.then166 + br i1 %cmp168, label %if.then176, label %if.end180 + +if.then176: ; preds = %if.end173 + %idxprom178 = sext i32 %add167 to i64 + unreachable + +if.end180: ; preds = %if.end173 + br i1 %cmp168, label %if.then183, label %if.end187 + +if.then183: ; preds = %if.end180 + %idxprom185 = sext i32 %add167 to i64 + unreachable + +if.end187: ; preds = %if.end180 + br i1 %cmp168, label %if.then190, label %if.end628 + +if.then190: ; preds = %if.end187 + br label %if.end628 + +if.else195: ; preds = %if.else164 + %5 = load i32, i32* @d, align 4 + %tobool196 = icmp ne i32 %5, 0 + br i1 %tobool196, label %if.then197, label %if.else205 + +if.then197: ; preds = %if.else195 + %add198 = add nsw i32 %e.0, 1 + %cmp199 = icmp sge i32 %add198, %i + br i1 %cmp199, label %if.then200, label %if.end628 + +if.then200: ; preds = %if.then197 + %idxprom202 = sext i32 %add198 to i64 + br label %if.end628 + +if.else205: ; preds = %if.else195 + %call206 = call i32 @h() + %tobool207 = icmp ne i32 %call206, 0 + br i1 %tobool207, label %if.then208, label %if.else217 + +if.then208: ; preds = %if.else205 + %add209 = add nsw i32 %e.0, 1 + %cmp210 = icmp sge i32 %add209, %i + br i1 %cmp210, label %if.then211, label %if.end215 + +if.then211: ; preds = %if.then208 + %idxprom213 = sext i32 %add209 to i64 + unreachable + +if.end215: ; preds = %if.then208 + %6 = zext i32 %add209 to i64 + br label %if.end628 + +if.else217: ; preds = %if.else205 + %7 = load i32*, i32** @c, align 8 + %tobool218 = icmp ne i32* %7, null + br i1 %tobool218, label %if.then219, label %if.else227 + +if.then219: ; preds = %if.else217 + %add220 = add nsw i32 %e.0, 1 + %cmp221 = icmp sge i32 %add220, %i + br i1 %cmp221, label %if.then222, label %if.end628 + +if.then222: ; preds = %if.then219 + %idxprom224 = sext i32 %add220 to i64 + br label %if.end628 + +if.else227: ; preds = %if.else217 + %call228 = call i32 @g() + %tobool229 = icmp ne i32 %call228, 0 + br i1 %tobool229, label %if.then230, label %if.else245 + +if.then230: ; preds = %if.else227 + %add231 = add nsw i32 %e.0, 1 + %cmp232 = icmp sge i32 %add231, %i + br i1 %cmp232, label %if.then233, label %if.end237 + +if.then233: ; preds = %if.then230 + %idxprom235 = sext i32 %add231 to i64 + br label %if.end237 + +if.end237: ; preds = %if.then233, %if.then230 + br i1 %cmp232, label %if.then240, label %if.end628 + +if.then240: ; preds = %if.end237 + %idxprom242 = sext i32 %add231 to i64 + br label %if.end628 + +if.else245: ; preds = %if.else227 + %8 = load i32*, i32** @c, align 8 + %tobool246 = icmp ne i32* %8, null + br i1 %tobool246, label %if.then247, label %if.else258 + +if.then247: ; preds = %if.else245 + %add248 = add nsw i32 %e.0, 1 + %cmp249 = icmp sge i32 %add248, %i + br i1 %cmp249, label %if.then250, label %if.end254 + +if.then250: ; preds = %if.then247 + %idxprom252 = sext i32 %add248 to i64 + unreachable + +if.end254: ; preds = %if.then247 + %9 = zext i32 %add248 to i64 + br label %if.end628 + +if.else258: ; preds = %if.else245 + %10 = load i32, i32* @d, align 4 + %tobool259 = icmp ne i32 %10, 0 + br i1 %tobool259, label %if.then260, label %if.else268 + +if.then260: ; preds = %if.else258 + %add261 = add nsw i32 %e.0, 1 + %cmp262 = icmp sge i32 %add261, %i + br i1 %cmp262, label %if.then263, label %if.end628 + +if.then263: ; preds = %if.then260 + %idxprom265 = sext i32 %add261 to i64 + br label %if.end628 + +if.else268: ; preds = %if.else258 + %call269 = call i32 @h() + %tobool270 = icmp ne i32 %call269, 0 + br i1 %tobool270, label %if.then271, label %if.else279 + +if.then271: ; preds = %if.else268 + %add272 = add nsw i32 %e.0, 1 + %cmp273 = icmp sge i32 %add272, %i + br i1 %cmp273, label %if.then274, label %if.end628 + +if.then274: ; preds = %if.then271 + %idxprom276 = sext i32 %add272 to i64 + br label %if.end628 + +if.else279: ; preds = %if.else268 + %11 = load i32*, i32** @c, align 8 + %tobool280 = icmp ne i32* %11, null + br i1 %tobool280, label %if.then281, label %if.else287 + +if.then281: ; preds = %if.else279 + %add282 = add nsw i32 %e.0, 2 + %cmp283 = icmp sge i32 %add282, %i + br i1 %cmp283, label %if.then284, label %if.end628 + +if.then284: ; preds = %if.then281 + br label %if.end628 + +if.else287: ; preds = %if.else279 + %call288 = call i32 @g() + %tobool289 = icmp ne i32 %call288, 0 + br i1 %tobool289, label %if.then290, label %if.else307 + +if.then290: ; preds = %if.else287 + %12 = load i32*, i32** @c, align 8 + %tobool291 = icmp ne i32* %12, null + br i1 %tobool291, label %if.then292, label %if.else298 + +if.then292: ; preds = %if.then290 + %add293 = add nsw i32 %e.0, 3 + %cmp294 = icmp sge i32 %add293, %i + br i1 %cmp294, label %if.then295, label %if.end628 + +if.then295: ; preds = %if.then292 + br label %if.end628 + +if.else298: ; preds = %if.then290 + %add299 = add nsw i32 %e.0, 4 + %cmp300 = icmp sge i32 %add299, %i + br i1 %cmp300, label %if.then301, label %if.end628 + +if.then301: ; preds = %if.else298 + %idxprom303 = sext i32 %add299 to i64 + br label %if.end628 + +if.else307: ; preds = %if.else287 + %13 = load i32*, i32** @c, align 8 + %tobool308 = icmp ne i32* %13, null + br i1 %tobool308, label %if.then309, label %if.else324 + +if.then309: ; preds = %if.else307 + %add310 = add nsw i32 %e.0, 1 + %cmp311 = icmp sge i32 %add310, %i + br i1 %cmp311, label %if.then312, label %if.else316 + +if.then312: ; preds = %if.then309 + %idxprom314 = sext i32 %add310 to i64 + br label %if.end628 + +if.else316: ; preds = %if.then309 + br i1 undef, label %if.then318, label %if.end628 + +if.then318: ; preds = %if.else316 + %idxprom320 = sext i32 %add310 to i64 + br label %if.end628 + +if.else324: ; preds = %if.else307 + %call325 = call i32 @g() + %tobool326 = icmp ne i32 %call325, 0 + br i1 %tobool326, label %if.then327, label %if.else475 + +if.then327: ; preds = %if.else324 + %add328 = add nsw i32 %e.0, 2 + %cmp329 = icmp sge i32 %add328, %i + br i1 %cmp329, label %if.then330, label %if.end332 + +if.then330: ; preds = %if.then327 + br label %if.end332 + +if.end332: ; preds = %if.then330, %if.then327 + %e.11 = phi i32 [ %add328, %if.then330 ], [ %e.0, %if.then327 ] + %cmp333 = icmp sge i32 %e.11, %i + br i1 %cmp333, label %if.then334, label %if.end336 + +if.then334: ; preds = %if.end332 + %inc335 = add nsw i32 %e.11, 1 + br label %if.end336 + +if.end336: ; preds = %if.then334, %if.end332 + %e.12 = phi i32 [ %inc335, %if.then334 ], [ %e.11, %if.end332 ] + %cmp337 = icmp sge i32 %e.12, %i + br i1 %cmp337, label %if.then338, label %if.end340 + +if.then338: ; preds = %if.end336 + %inc339 = add nsw i32 %e.12, 1 + br label %if.end340 + +if.end340: ; preds = %if.then338, %if.end336 + %e.13 = phi i32 [ %inc339, %if.then338 ], [ %e.12, %if.end336 ] + %cmp341 = icmp sge i32 %e.13, %i + br i1 %cmp341, label %if.then342, label %if.end344 + +if.then342: ; preds = %if.end340 + %inc343 = add nsw i32 %e.13, 1 + br label %if.end344 + +if.end344: ; preds = %if.then342, %if.end340 + %e.14 = phi i32 [ %inc343, %if.then342 ], [ %e.13, %if.end340 ] + %call345 = call i32 @g() + %tobool346 = icmp ne i32 %call345, 0 + br i1 %tobool346, label %if.then347, label %if.else398 + +if.then347: ; preds = %if.end344 + %cmp348 = icmp sge i32 %e.14, %i + br i1 %cmp348, label %if.then349, label %if.end352 + +if.then349: ; preds = %if.then347 + %idxprom350 = sext i32 %e.14 to i64 + br label %if.end352 + +if.end352: ; preds = %if.then349, %if.then347 + br i1 %cmp348, label %if.then354, label %if.else357 + +if.then354: ; preds = %if.end352 + %idxprom355 = sext i32 %e.14 to i64 + br label %if.end361 + +if.else357: ; preds = %if.end352 + %idxprom359 = sext i32 %e.14 to i64 + br label %if.end361 + +if.end361: ; preds = %if.else357, %if.then354 + br i1 %cmp348, label %if.then363, label %if.end366 + +if.then363: ; preds = %if.end361 + %idxprom364 = sext i32 %e.14 to i64 + br label %if.end366 + +if.end366: ; preds = %if.then363, %if.end361 + br i1 %cmp348, label %if.then368, label %if.end371 + +if.then368: ; preds = %if.end366 + %idxprom369 = sext i32 %e.14 to i64 + br label %if.end371 + +if.end371: ; preds = %if.then368, %if.end366 + br i1 %cmp348, label %if.then373, label %if.end376 + +if.then373: ; preds = %if.end371 + %idxprom374 = sext i32 %e.14 to i64 + br label %if.end376 + +if.end376: ; preds = %if.then373, %if.end371 + br i1 %cmp348, label %if.then378, label %if.end381 + +if.then378: ; preds = %if.end376 + %idxprom379 = sext i32 %e.14 to i64 + br label %if.end381 + +if.end381: ; preds = %if.then378, %if.end376 + br i1 %cmp348, label %if.then383, label %if.else386 + +if.then383: ; preds = %if.end381 + %idxprom384 = sext i32 %e.14 to i64 + br label %if.end390 + +if.else386: ; preds = %if.end381 + %idxprom388 = sext i32 %e.14 to i64 + br label %if.end390 + +if.end390: ; preds = %if.else386, %if.then383 + %add391 = add nsw i32 %e.14, 1 + %cmp392 = icmp sge i32 %add391, %i + br i1 %cmp392, label %if.then393, label %if.end628 + +if.then393: ; preds = %if.end390 + %idxprom395 = sext i32 %add391 to i64 + br label %if.end628 + +if.else398: ; preds = %if.end344 + %call399 = call i32 @h() + %tobool400 = icmp ne i32 %call399, 0 + br i1 %tobool400, label %if.then401, label %if.else409 + +if.then401: ; preds = %if.else398 + %add402 = add nsw i32 %e.14, 1 + %cmp403 = icmp sge i32 %add402, %i + br i1 %cmp403, label %if.then404, label %if.end628 + +if.then404: ; preds = %if.then401 + %idxprom406 = sext i32 %add402 to i64 + br label %if.end628 + +if.else409: ; preds = %if.else398 + %call410 = call i32 @h() + %tobool411 = icmp ne i32 %call410, 0 + br i1 %tobool411, label %if.then412, label %if.else420 + +if.then412: ; preds = %if.else409 + %add413 = add nsw i32 %e.14, 1 + %cmp414 = icmp sge i32 %add413, %i + br i1 %cmp414, label %if.then415, label %if.end628 + +if.then415: ; preds = %if.then412 + %idxprom417 = sext i32 %add413 to i64 + br label %if.end628 + +if.else420: ; preds = %if.else409 + %call421 = call i32 @h() + %tobool422 = icmp ne i32 %call421, 0 + br i1 %tobool422, label %if.then423, label %if.else431 + +if.then423: ; preds = %if.else420 + %add424 = add nsw i32 %e.14, 3 + %cmp425 = icmp sge i32 %add424, %i + br i1 %cmp425, label %if.then426, label %if.end628 + +if.then426: ; preds = %if.then423 + %idxprom428 = sext i32 %add424 to i64 + br label %if.end628 + +if.else431: ; preds = %if.else420 + %call432 = call i32 @h() + %tobool433 = icmp ne i32 %call432, 0 + br i1 %tobool433, label %if.then434, label %if.else440 + +if.then434: ; preds = %if.else431 + %add435 = add nsw i32 %e.14, 1 + %cmp436 = icmp sge i32 %add435, %i + br i1 %cmp436, label %if.then437, label %if.end628 + +if.then437: ; preds = %if.then434 + br label %if.end628 + +if.else440: ; preds = %if.else431 + %call441 = call i32 @h() + %tobool442 = icmp ne i32 %call441, 0 + br i1 %tobool442, label %if.then443, label %if.else451 + +if.then443: ; preds = %if.else440 + %tobool444 = icmp ne i32 %e.14, 0 + br i1 %tobool444, label %if.then445, label %if.end628 + +if.then445: ; preds = %if.then443 + %cmp446 = icmp sge i32 %e.14, %i + br i1 %cmp446, label %if.then447, label %if.end628 + +if.then447: ; preds = %if.then445 + br label %if.end628 + +if.else451: ; preds = %if.else440 + %call452 = call i32 @h() + %tobool453 = icmp ne i32 %call452, 0 + br i1 %tobool453, label %if.then454, label %if.else460 + +if.then454: ; preds = %if.else451 + %add455 = add nsw i32 %e.14, 1 + %cmp456 = icmp sge i32 %add455, %i + br i1 %cmp456, label %if.then457, label %if.end628 + +if.then457: ; preds = %if.then454 + br label %if.end628 + +if.else460: ; preds = %if.else451 + %add461 = add nsw i32 %e.14, 2 + %cmp462 = icmp sge i32 %add461, %i + br i1 %cmp462, label %if.then463, label %if.end628 + +if.then463: ; preds = %if.else460 + %idxprom465 = sext i32 %add461 to i64 + br label %if.end628 + +if.else475: ; preds = %if.else324 + %call476 = call i32 @g() + %tobool477 = icmp ne i32 %call476, 0 + br i1 %tobool477, label %if.then478, label %if.else509 + +if.then478: ; preds = %if.else475 + %call479 = call i32 @h() + %tobool480 = icmp ne i32 %call479, 0 + br i1 %tobool480, label %if.then481, label %if.else487 + +if.then481: ; preds = %if.then478 + %add482 = add nsw i32 %e.0, 1 + %cmp483 = icmp sge i32 %add482, %i + br i1 %cmp483, label %if.then484, label %if.end628 + +if.then484: ; preds = %if.then481 + br label %if.end628 + +if.else487: ; preds = %if.then478 + %call488 = call i32 @h() + %tobool489 = icmp ne i32 %call488, 0 + br i1 %tobool489, label %if.then490, label %if.else496 + +if.then490: ; preds = %if.else487 + %add491 = add nsw i32 %e.0, 1 + %cmp492 = icmp sge i32 %add491, %i + br i1 %cmp492, label %if.then493, label %if.end628 + +if.then493: ; preds = %if.then490 + br label %if.end628 + +if.else496: ; preds = %if.else487 + %add497 = add nsw i32 %e.0, 1 + %cmp498 = icmp sge i32 %add497, %i + br i1 %cmp498, label %if.then499, label %if.else501 + +if.then499: ; preds = %if.else496 + br label %if.end628 + +if.else501: ; preds = %if.else496 + br i1 undef, label %if.then503, label %if.end628 + +if.then503: ; preds = %if.else501 + br label %if.end628 + +if.else509: ; preds = %if.else475 + %call510 = call i32 @g() + %tobool511 = icmp ne i32 %call510, 0 + br i1 %tobool511, label %if.then512, label %if.else565 + +if.then512: ; preds = %if.else509 + %add513 = add nsw i32 %e.0, 1 + %cmp514 = icmp sge i32 %add513, %i + br i1 %cmp514, label %if.then515, label %if.end519 + +if.then515: ; preds = %if.then512 + %idxprom517 = sext i32 %add513 to i64 + br label %if.end519 + +if.end519: ; preds = %if.then515, %if.then512 + br i1 %cmp514, label %if.then522, label %if.end526 + +if.then522: ; preds = %if.end519 + %idxprom524 = sext i32 %add513 to i64 + br label %if.end526 + +if.end526: ; preds = %if.then522, %if.end519 + br i1 %cmp514, label %if.then529, label %if.end533 + +if.then529: ; preds = %if.end526 + %idxprom531 = sext i32 %add513 to i64 + br label %if.end533 + +if.end533: ; preds = %if.then529, %if.end526 + %add534 = add nsw i32 %e.0, 2 + %cmp535 = icmp sge i32 %add534, %i + br i1 %cmp535, label %if.then536, label %if.end540 + +if.then536: ; preds = %if.end533 + %idxprom538 = sext i32 %add534 to i64 + br label %if.end540 + +if.end540: ; preds = %if.then536, %if.end533 + br i1 %cmp535, label %if.then543, label %if.end547 + +if.then543: ; preds = %if.end540 + %idxprom545 = sext i32 %add534 to i64 + unreachable + +if.end547: ; preds = %if.end540 + br i1 %cmp514, label %if.then550, label %if.else554 + +if.then550: ; preds = %if.end547 + %idxprom552 = sext i32 %add513 to i64 + br label %if.end559 + +if.else554: ; preds = %if.end547 + %idxprom557 = sext i32 %add513 to i64 + br label %if.end559 + +if.end559: ; preds = %if.else554, %if.then550 + br i1 %cmp514, label %if.then562, label %if.end628 + +if.then562: ; preds = %if.end559 + br label %if.end628 + +if.else565: ; preds = %if.else509 + %call566 = call i32 @g() + %tobool567 = icmp ne i32 %call566, 0 + br i1 %tobool567, label %if.then568, label %if.else590 + +if.then568: ; preds = %if.else565 + %add569 = add nsw i32 %e.0, 2 + %cmp570 = icmp sge i32 %add569, %i + br i1 %cmp570, label %if.then571, label %if.else575 + +if.then571: ; preds = %if.then568 + %idxprom573 = sext i32 %add569 to i64 + br label %if.end582 + +if.else575: ; preds = %if.then568 + %idxprom579 = sext i32 %add569 to i64 + br label %if.end582 + +if.end582: ; preds = %if.else575, %if.then571 + %add583 = add nsw i32 %e.0, 1 + %cmp584 = icmp sge i32 %add583, %i + br i1 %cmp584, label %if.then585, label %if.end628 + +if.then585: ; preds = %if.end582 + %idxprom587 = sext i32 %add583 to i64 + br label %if.end628 + +if.else590: ; preds = %if.else565 + %call591 = call i32 @g() + %tobool592 = icmp ne i32 %call591, 0 + br i1 %tobool592, label %if.then593, label %if.end628 + +if.then593: ; preds = %if.else590 + %add594 = add nsw i32 %e.0, 1 + %cmp595 = icmp sge i32 %add594, %i + br i1 %cmp595, label %if.then596, label %if.else600 + +if.then596: ; preds = %if.then593 + %idxprom598 = sext i32 %add594 to i64 + br label %if.end628 + +if.else600: ; preds = %if.then593 + br i1 undef, label %if.then602, label %if.end628 + +if.then602: ; preds = %if.else600 + %idxprom604 = sext i32 %add594 to i64 + br label %if.end628 + +if.end628: ; preds = %if.then602, %if.else600, %if.then596, %if.else590, %if.then585, %if.end582, %if.then562, %if.end559, %if.then503, %if.else501, %if.then499, %if.then493, %if.then490, %if.then484, %if.then481, %if.then463, %if.else460, %if.then457, %if.then454, %if.then447, %if.then445, %if.then443, %if.then437, %if.then434, %if.then426, %if.then423, %if.then415, %if.then412, %if.then404, %if.then401, %if.then393, %if.end390, %if.then318, %if.else316, %if.then312, %if.then301, %if.else298, %if.then295, %if.then292, %if.then284, %if.then281, %if.then274, %if.then271, %if.then263, %if.then260, %if.end254, %if.then240, %if.end237, %if.then222, %if.then219, %if.end215, %if.then200, %if.then197, %if.then190, %if.end187, %if.end160, %if.else123, %if.then120, %if.end118, %if.then71, %if.else68, %if.then65, %if.end62, %if.then53, %for.cond + %e.15 = phi i32 [ %e.5, %if.then53 ], [ %add63, %if.then65 ], [ %e.6, %if.end62 ], [ %e.5, %if.then71 ], [ %e.5, %if.else68 ], [ %e.9, %if.then120 ], [ %e.9, %if.end118 ], [ %e.10, %if.end160 ], [ %e.0, %if.then190 ], [ %e.0, %if.end187 ], [ %e.0, %if.then200 ], [ %e.0, %if.then197 ], [ %e.0, %if.end215 ], [ %e.0, %if.then222 ], [ %e.0, %if.then219 ], [ %e.0, %if.then240 ], [ %e.0, %if.end237 ], [ %e.0, %if.end254 ], [ %e.0, %if.then263 ], [ %e.0, %if.then260 ], [ %e.0, %if.then274 ], [ %e.0, %if.then271 ], [ %add282, %if.then284 ], [ %e.0, %if.then281 ], [ %add293, %if.then295 ], [ %e.0, %if.then292 ], [ %e.0, %if.then301 ], [ %e.0, %if.else298 ], [ %e.0, %if.then312 ], [ %e.0, %if.then318 ], [ %e.0, %if.else316 ], [ %e.14, %if.then393 ], [ %e.14, %if.end390 ], [ %e.14, %if.then404 ], [ %e.14, %if.then401 ], [ %e.14, %if.then415 ], [ %e.14, %if.then412 ], [ %e.14, %if.then426 ], [ %e.14, %if.then423 ], [ %add435, %if.then437 ], [ %e.14, %if.then434 ], [ %e.14, %if.then447 ], [ %e.14, %if.then445 ], [ %e.14, %if.then443 ], [ %add455, %if.then457 ], [ %e.14, %if.then454 ], [ %e.14, %if.then463 ], [ %e.14, %if.else460 ], [ %add482, %if.then484 ], [ %e.0, %if.then481 ], [ %add491, %if.then493 ], [ %e.0, %if.then490 ], [ %add497, %if.then499 ], [ %add497, %if.then503 ], [ %e.0, %if.else501 ], [ %add513, %if.then562 ], [ %e.0, %if.end559 ], [ %e.0, %if.then585 ], [ %e.0, %if.end582 ], [ %e.0, %if.then596 ], [ %e.0, %if.then602 ], [ %e.0, %if.else600 ], [ %e.0, %if.else590 ], [ %e.0, %if.else123 ], [ %e.0, %for.cond ] + br label %for.cond +} + +declare i32 @g() + +declare i32 @h() + +; Function Attrs: nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) + diff -Nru llvm-toolchain-12-12.0.0/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp llvm-toolchain-12-12.0.1/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp --- llvm-toolchain-12-12.0.0/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp 2021-07-09 07:05:03.000000000 +0000 @@ -100,11 +100,7 @@ for (const auto &FCS : Summaries.drop_front()) { Summary.RegionCoverage.merge(FCS.RegionCoverage); Summary.LineCoverage.merge(FCS.LineCoverage); - - // Sum branch coverage across instantiation groups for the summary rather - // than "merge" the maximum count. This is a clearer view into whether all - // created branches are covered. - Summary.BranchCoverage += FCS.BranchCoverage; + Summary.BranchCoverage.merge(FCS.BranchCoverage); } return Summary; } diff -Nru llvm-toolchain-12-12.0.0/llvm/tools/llvm-cov/CoverageSummaryInfo.h llvm-toolchain-12-12.0.1/llvm/tools/llvm-cov/CoverageSummaryInfo.h --- llvm-toolchain-12-12.0.0/llvm/tools/llvm-cov/CoverageSummaryInfo.h 2021-02-17 08:14:30.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/tools/llvm-cov/CoverageSummaryInfo.h 2021-07-09 07:05:03.000000000 +0000 @@ -123,6 +123,11 @@ return *this; } + void merge(const BranchCoverageInfo &RHS) { + Covered = std::max(Covered, RHS.Covered); + NumBranches = std::max(NumBranches, RHS.NumBranches); + } + size_t getCovered() const { return Covered; } size_t getNumBranches() const { return NumBranches; } diff -Nru llvm-toolchain-12-12.0.0/llvm/tools/llvm-exegesis/lib/X86/Target.cpp llvm-toolchain-12-12.0.1/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- llvm-toolchain-12-12.0.0/llvm/tools/llvm-exegesis/lib/X86/Target.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/tools/llvm-exegesis/lib/X86/Target.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -22,6 +22,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Host.h" #include #include @@ -727,13 +728,25 @@ #if defined(__linux__) && defined(HAVE_LIBPFM) && \ defined(LIBPFM_HAS_FIELD_CYCLES) - // If the kernel supports it, the hardware still may not have it. - return X86LbrCounter::checkLbrSupport(); + // FIXME: Fix this. + // https://bugs.llvm.org/show_bug.cgi?id=48918 + // For now, only do the check if we see an Intel machine because + // the counter uses some intel-specific magic and it could + // be confuse and think an AMD machine actually has LBR support. +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64) + using namespace sys::detail::x86; + + if (getVendorSignature() == VendorSignatures::GENUINE_INTEL) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::checkLbrSupport(); #else + llvm_unreachable("Running X86 exegesis on non-X86 target"); +#endif +#endif return llvm::make_error( "LBR not supported on this kernel and/or platform", llvm::errc::not_supported); -#endif } std::unique_ptr withSavedState() const override { diff -Nru llvm-toolchain-12-12.0.0/llvm/utils/gn/secondary/llvm/version.gni llvm-toolchain-12-12.0.1/llvm/utils/gn/secondary/llvm/version.gni --- llvm-toolchain-12-12.0.0/llvm/utils/gn/secondary/llvm/version.gni 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/utils/gn/secondary/llvm/version.gni 2021-07-09 07:09:49.000000000 +0000 @@ -1,4 +1,4 @@ llvm_version_major = 12 llvm_version_minor = 0 -llvm_version_patch = 0 +llvm_version_patch = 1 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff -Nru llvm-toolchain-12-12.0.0/llvm/utils/lit/lit/__init__.py llvm-toolchain-12-12.0.1/llvm/utils/lit/lit/__init__.py --- llvm-toolchain-12-12.0.0/llvm/utils/lit/lit/__init__.py 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/utils/lit/lit/__init__.py 2021-07-09 07:09:49.000000000 +0000 @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (12, 0, 0) +__versioninfo__ = (12, 0, 1) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff -Nru llvm-toolchain-12-12.0.0/llvm/utils/lit/lit/TestingConfig.py llvm-toolchain-12-12.0.1/llvm/utils/lit/lit/TestingConfig.py --- llvm-toolchain-12-12.0.0/llvm/utils/lit/lit/TestingConfig.py 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/utils/lit/lit/TestingConfig.py 2021-07-09 07:09:49.000000000 +0000 @@ -33,6 +33,7 @@ pass_vars.append('INCLUDE') pass_vars.append('LIB') pass_vars.append('PATHEXT') + pass_vars.append('USERPROFILE') environment['PYTHONBUFFERED'] = '1' for var in pass_vars: diff -Nru llvm-toolchain-12-12.0.0/llvm/utils/release/build_llvm_package.bat llvm-toolchain-12-12.0.1/llvm/utils/release/build_llvm_package.bat --- llvm-toolchain-12-12.0.0/llvm/utils/release/build_llvm_package.bat 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/llvm/utils/release/build_llvm_package.bat 2021-07-09 07:09:49.000000000 +0000 @@ -27,8 +27,8 @@ for /f "usebackq" %%i in (`PowerShell ^(Get-Date^).ToString^('yyyyMMdd'^)`) do set datestamp=%%i set revision=%1 -set package_version=12.0.0-%revision:~0,8% -set clang_format_vs_version=12.0.0.%datestamp% +set package_version=12.0.1-%revision:~0,8% +set clang_format_vs_version=12.0.1.%datestamp% set build_dir=llvm_package_%revision:~0,8% echo Revision: %revision% diff -Nru llvm-toolchain-12-12.0.0/openmp/libomptarget/src/interface.cpp llvm-toolchain-12-12.0.1/openmp/libomptarget/src/interface.cpp --- llvm-toolchain-12-12.0.0/openmp/libomptarget/src/interface.cpp 2021-04-15 05:53:50.000000000 +0000 +++ llvm-toolchain-12-12.0.1/openmp/libomptarget/src/interface.cpp 2021-07-09 07:09:49.000000000 +0000 @@ -61,7 +61,7 @@ for (auto &Device : PM->Devices) dumpTargetPointerMappings(loc, Device); else - FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target " + FAILURE_MESSAGE("Run with LIBOMPTARGET_INFO=%d to dump host-target " "pointer mappings.\n", OMP_INFOTYPE_DUMP_TABLE); diff -Nru llvm-toolchain-12-12.0.0/polly/lib/External/ppcg/external.c llvm-toolchain-12-12.0.1/polly/lib/External/ppcg/external.c --- llvm-toolchain-12-12.0.0/polly/lib/External/ppcg/external.c 2020-10-16 21:13:11.000000000 +0000 +++ llvm-toolchain-12-12.0.1/polly/lib/External/ppcg/external.c 2021-07-09 07:05:03.000000000 +0000 @@ -1,181 +1,192 @@ -#include "assert.h" -#include "stdio.h" -#include "stdlib.h" +#include +#include +#include +#include +#include "cpu.h" +#include "opencl.h" + #define die() { \ fprintf(stderr, "Dummy function %s called\n", __FUNCTION__); \ abort(); \ } -void pet_scop_compute_outer_to_any(){ +__isl_give isl_union_map *pet_scop_compute_outer_to_any( + __isl_keep pet_scop *scop) { die(); } -void pet_scop_compute_outer_to_inner(){ +__isl_give isl_union_map *pet_scop_compute_outer_to_inner( + __isl_keep pet_scop *scop) { die(); } -void pet_tree_get_type(){ +enum pet_tree_type pet_tree_get_type(__isl_keep pet_tree *tree) { die(); } -void pet_tree_foreach_access_expr(){ +int pet_tree_foreach_access_expr(__isl_keep pet_tree *tree, + int (*fn)(__isl_keep pet_expr *expr, void *user), void *user) { die(); } -void pet_expr_get_ctx(){ +isl_ctx *pet_expr_get_ctx(__isl_keep pet_expr *expr) { die(); } -void pet_expr_access_is_read(){ +isl_bool pet_expr_access_is_read(__isl_keep pet_expr *expr) { die(); } -void pet_expr_access_is_write(){ +isl_bool pet_expr_access_is_write(__isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_tagged_may_read(){ +__isl_give isl_union_map *pet_expr_access_get_tagged_may_read( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_tagged_may_write(){ +__isl_give isl_union_map *pet_expr_access_get_tagged_may_write( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_must_write(){ +__isl_give isl_union_map *pet_expr_access_get_must_write( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_index(){ +__isl_give isl_multi_pw_aff *pet_expr_access_get_index( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_ref_id(){ +__isl_give isl_id *pet_expr_access_get_ref_id(__isl_keep pet_expr *expr) { die(); } -void print_cpu(){ +__isl_give isl_printer *print_cpu(__isl_take isl_printer *p, + struct ppcg_scop *ps, struct ppcg_options *options) { die(); } -void pet_stmt_print_body(){ - die(); -} -void pet_loc_get_start(){ - die(); -} -void pet_loc_get_end(){ - die(); -} -void pet_scop_collect_tagged_may_reads(){ - die(); -} -void pet_scop_collect_may_reads(){ +__isl_give isl_printer *pet_stmt_print_body(struct pet_stmt *stmt, + __isl_take isl_printer *p, __isl_keep isl_id_to_ast_expr *ref2expr) { die(); } -void pet_scop_collect_tagged_may_writes(){ +unsigned pet_loc_get_start(__isl_keep pet_loc *loc) { die(); } -void pet_scop_collect_may_writes(){ +unsigned pet_loc_get_end(__isl_keep pet_loc *loc) { die(); } -void pet_scop_collect_tagged_must_writes(){ +int pet_transform_C_source(isl_ctx *ctx, const char *input, FILE *output, + __isl_give isl_printer *(*transform)(__isl_take isl_printer *p, + __isl_take pet_scop *scop, void *user), void *user) { die(); } -void pet_scop_collect_must_writes(){ +__isl_give isl_printer *pet_scop_print_original(__isl_keep pet_scop *scop, + __isl_take isl_printer *p) { die(); } -void pet_scop_collect_tagged_must_kills(){ +__isl_null pet_scop *pet_scop_free(__isl_take pet_scop *scop) { die(); } -void pet_transform_C_source(){ +__isl_give pet_scop *pet_scop_align_params(__isl_take pet_scop *scop) { die(); } -void pet_scop_print_original(){ +int pet_scop_can_build_ast_exprs(__isl_keep pet_scop *scop) { die(); } -void pet_scop_free(){ +int pet_scop_has_data_dependent_conditions(__isl_keep pet_scop *scop) { die(); } -void pet_scop_align_params(){ +int pet_tree_foreach_expr(__isl_keep pet_tree *tree, + int (*fn)(__isl_keep pet_expr *expr, void *user), void *user) { die(); } -void pet_scop_can_build_ast_exprs(){ +int pet_expr_foreach_call_expr(__isl_keep pet_expr *expr, + int (*fn)(__isl_keep pet_expr *expr, void *user), void *user) { die(); } -void pet_scop_has_data_dependent_conditions(){ +int pet_stmt_is_kill(struct pet_stmt *stmt) { die(); } -void pet_tree_foreach_expr(){ +struct isl_args pet_options_args; +const char *ppcg_version(void) { die(); } -void pet_expr_foreach_call_expr(){ +int pet_options_set_encapsulate_dynamic_control(isl_ctx *ctx, int val) { die(); } -void pet_stmt_is_kill(){ +int generate_opencl(isl_ctx *ctx, struct ppcg_options *options, + const char *input, const char *output) { die(); } -void pet_options_args() { +int generate_cpu(isl_ctx *ctx, struct ppcg_options *options, + const char *input, const char *output) { die(); } -void ppcg_print_guarded() { +__isl_give isl_id_to_ast_expr *pet_stmt_build_ast_exprs(struct pet_stmt *stmt, + __isl_keep isl_ast_build *build, + __isl_give isl_multi_pw_aff *(*fn_index)( + __isl_take isl_multi_pw_aff *mpa, __isl_keep isl_id *id, + void *user), void *user_index, + __isl_give isl_ast_expr *(*fn_expr)(__isl_take isl_ast_expr *expr, + __isl_keep isl_id *id, void *user), void *user_expr) { die(); } -void ppcg_version() { +__isl_give isl_union_map *pet_scop_get_tagged_may_reads( + __isl_keep pet_scop *scop) { die(); } -void pet_options_set_encapsulate_dynamic_control() { +__isl_give isl_union_map *pet_scop_get_may_reads(__isl_keep pet_scop *scop) { die(); } -void generate_opencl() { +__isl_give isl_union_map *pet_scop_get_may_writes(__isl_keep pet_scop *scop) { die(); } -void generate_cpu() { +__isl_give isl_union_map *pet_scop_get_must_writes(__isl_keep pet_scop *scop) { die(); } -void pet_stmt_build_ast_exprs() { +__isl_give isl_union_map *pet_scop_get_tagged_may_writes( + __isl_keep pet_scop *scop) { die(); } - void pet_scop_get_tagged_may_reads() { +__isl_give isl_union_map *pet_scop_get_tagged_must_writes( + __isl_keep pet_scop *scop) { die(); } - void pet_scop_get_may_reads() { - die(); -} -void pet_scop_get_may_writes() { - die(); -} -void pet_scop_get_must_writes() { - die(); -} -void pet_scop_get_tagged_may_writes() { - die(); -} -void pet_scop_get_tagged_must_writes() { -die(); -} -void pet_scop_get_must_kills() { +__isl_give isl_union_map *pet_scop_get_must_kills(__isl_keep pet_scop *scop) { die(); } -void pet_scop_get_tagged_must_kills() { +__isl_give isl_union_map *pet_scop_get_tagged_must_kills( + __isl_keep pet_scop *scop) { die(); } -void pet_expr_call_get_name() { +__isl_keep const char *pet_expr_call_get_name(__isl_keep pet_expr *expr) { die(); } -void pet_expr_call_set_name() { +__isl_give pet_expr *pet_expr_call_set_name(__isl_take pet_expr *expr, + __isl_keep const char *name) { die(); } -void pet_expr_get_arg() { +__isl_give pet_expr *pet_expr_get_arg(__isl_keep pet_expr *expr, int pos) { die(); } -void pet_expr_new_cast() { +__isl_give pet_expr *pet_expr_new_cast(const char *type_name, + __isl_take pet_expr *arg) { die(); } -void pet_expr_set_arg() { +__isl_give pet_expr *pet_expr_set_arg(__isl_take pet_expr *expr, int pos, + __isl_take pet_expr *arg) { die(); } -void pet_tree_copy() { +__isl_give pet_tree *pet_tree_copy(__isl_keep pet_tree *tree) { die(); } -void pet_tree_free() { +__isl_null pet_tree *pet_tree_free(__isl_take pet_tree *tree) { die(); } -void pet_tree_map_call_expr() { +__isl_give pet_tree *pet_tree_map_call_expr(__isl_take pet_tree *tree, + __isl_give pet_expr *(*fn)(__isl_take pet_expr *expr, void *user), + void *user) { die(); } -void pet_expr_access_get_may_read() { +__isl_give isl_union_map *pet_expr_access_get_may_read( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_may_write() { +__isl_give isl_union_map *pet_expr_access_get_may_write( + __isl_keep pet_expr *expr) { die(); } diff -Nru llvm-toolchain-12-12.0.0/polly/lib/External/ppcg/print.c llvm-toolchain-12-12.0.1/polly/lib/External/ppcg/print.c --- llvm-toolchain-12-12.0.0/polly/lib/External/ppcg/print.c 2020-10-16 21:13:11.000000000 +0000 +++ llvm-toolchain-12-12.0.1/polly/lib/External/ppcg/print.c 2021-07-09 07:05:03.000000000 +0000 @@ -9,6 +9,7 @@ #include #include +#include #include "print.h" #include "util.h"